'DATAFRAME TO BIGQUERY - Error: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp1yeitxcu_job_4b7daa39.parquet'

I am uploading a dataframe to a bigquery table.

df.to_gbq('Deduplic.DailyReport', project_id=BQ_PROJECT_ID, credentials=credentials, if_exists='append')

And I get the following error:

OSError                                   Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/client.py in load_table_from_dataframe(self, dataframe, destination, num_retries, job_id, job_id_prefix, location, project, job_config, parquet_compression, timeout)
   2624 
-> 2625                     _pandas_helpers.dataframe_to_parquet(
   2626                         dataframe,

~/.local/lib/python3.8/site-packages/google/cloud/bigquery/_pandas_helpers.py in dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression, parquet_use_compliant_nested_type)
    672     arrow_table = dataframe_to_arrow(dataframe, bq_schema)
--> 673     pyarrow.parquet.write_table(
    674         arrow_table,

~/.local/lib/python3.8/site-packages/pyarrow/parquet.py in write_table(table, where, row_group_size, version, use_dictionary, compression, write_statistics, use_deprecated_int96_timestamps, coerce_timestamps, allow_truncated_timestamps, data_page_size, flavor, filesystem, compression_level, use_byte_stream_split, column_encoding, data_page_version, use_compliant_nested_type, **kwargs)
   2091                 **kwargs) as writer:
-> 2092             writer.write_table(table, row_group_size=row_group_size)
   2093     except Exception:

~/.local/lib/python3.8/site-packages/pyarrow/parquet.py in write_table(self, table, row_group_size)
    753 
--> 754         self.writer.write_table(table, row_group_size=row_group_size)
    755 

~/.local/lib/python3.8/site-packages/pyarrow/_parquet.pyx in pyarrow._parquet.ParquetWriter.write_table()

~/.local/lib/python3.8/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()

OSError: [Errno 28] Error writing bytes to file. Detail: [errno 28] No space left on device

During handling of the above exception, another exception occurred:

FileNotFoundError                         Traceback (most recent call last)
<ipython-input-8-f7137c1f7ee8> in <module>
     62 )
     63 
---> 64 df.to_gbq('Deduplic.DailyReport', project_id=BQ_PROJECT_ID, credentials=credentials, if_exists='append')

~/.local/lib/python3.8/site-packages/pandas/core/frame.py in to_gbq(self, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
   2052         from pandas.io import gbq
   2053 
-> 2054         gbq.to_gbq(
   2055             self,
   2056             destination_table,

~/.local/lib/python3.8/site-packages/pandas/io/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
    210 ) -> None:
    211     pandas_gbq = _try_import()
--> 212     pandas_gbq.to_gbq(
    213         dataframe,
    214         destination_table,

~/.local/lib/python3.8/site-packages/pandas_gbq/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials, api_method, verbose, private_key)
   1191         return
   1192 
-> 1193     connector.load_data(
   1194         dataframe,
   1195         destination_table_ref,

~/.local/lib/python3.8/site-packages/pandas_gbq/gbq.py in load_data(self, dataframe, destination_table_ref, chunksize, schema, progress_bar, api_method, billing_project)
    584 
    585         try:
--> 586             chunks = load.load_chunks(
    587                 self.client,
    588                 dataframe,

~/.local/lib/python3.8/site-packages/pandas_gbq/load.py in load_chunks(client, dataframe, destination_table_ref, chunksize, schema, location, api_method, billing_project)
    235 ):
    236     if api_method == "load_parquet":
--> 237         load_parquet(
    238             client,
    239             dataframe,

~/.local/lib/python3.8/site-packages/pandas_gbq/load.py in load_parquet(client, dataframe, destination_table_ref, location, schema, billing_project)
    127 
    128     try:
--> 129         client.load_table_from_dataframe(
    130             dataframe,
    131             destination_table_ref,

~/.local/lib/python3.8/site-packages/google/cloud/bigquery/client.py in load_table_from_dataframe(self, dataframe, destination, num_retries, job_id, job_id_prefix, location, project, job_config, parquet_compression, timeout)
   2670 
   2671         finally:
-> 2672             os.remove(tmppath)
   2673 
   2674     def load_table_from_json(

FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp1yeitxcu_job_4b7daa39.parquet'

A solution please



Solution 1:[1]

As Ricco D has mentioned, when writing the dataframe to the table, the BigQuery client creates temporary files on the host system, then removes it once the dataframe is written. The source code of the client for your reference. The linked code chunk does the following operations.

  1. Create a temporary file
  2. Load the temporary file into the table
  3. Delete the file after loading.

The error you are facing is from the 1st step. There is not enough space for the BigQuery client to create the temporary file. So, consider deleting unused files from the host system for the client to create the temporary files.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Kabilan Mohanraj