'DATAFRAME TO BIGQUERY - Error: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp1yeitxcu_job_4b7daa39.parquet'
I am uploading a dataframe to a bigquery table.
df.to_gbq('Deduplic.DailyReport', project_id=BQ_PROJECT_ID, credentials=credentials, if_exists='append')
And I get the following error:
OSError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/client.py in load_table_from_dataframe(self, dataframe, destination, num_retries, job_id, job_id_prefix, location, project, job_config, parquet_compression, timeout)
2624
-> 2625 _pandas_helpers.dataframe_to_parquet(
2626 dataframe,
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/_pandas_helpers.py in dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression, parquet_use_compliant_nested_type)
672 arrow_table = dataframe_to_arrow(dataframe, bq_schema)
--> 673 pyarrow.parquet.write_table(
674 arrow_table,
~/.local/lib/python3.8/site-packages/pyarrow/parquet.py in write_table(table, where, row_group_size, version, use_dictionary, compression, write_statistics, use_deprecated_int96_timestamps, coerce_timestamps, allow_truncated_timestamps, data_page_size, flavor, filesystem, compression_level, use_byte_stream_split, column_encoding, data_page_version, use_compliant_nested_type, **kwargs)
2091 **kwargs) as writer:
-> 2092 writer.write_table(table, row_group_size=row_group_size)
2093 except Exception:
~/.local/lib/python3.8/site-packages/pyarrow/parquet.py in write_table(self, table, row_group_size)
753
--> 754 self.writer.write_table(table, row_group_size=row_group_size)
755
~/.local/lib/python3.8/site-packages/pyarrow/_parquet.pyx in pyarrow._parquet.ParquetWriter.write_table()
~/.local/lib/python3.8/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
OSError: [Errno 28] Error writing bytes to file. Detail: [errno 28] No space left on device
During handling of the above exception, another exception occurred:
FileNotFoundError Traceback (most recent call last)
<ipython-input-8-f7137c1f7ee8> in <module>
62 )
63
---> 64 df.to_gbq('Deduplic.DailyReport', project_id=BQ_PROJECT_ID, credentials=credentials, if_exists='append')
~/.local/lib/python3.8/site-packages/pandas/core/frame.py in to_gbq(self, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
2052 from pandas.io import gbq
2053
-> 2054 gbq.to_gbq(
2055 self,
2056 destination_table,
~/.local/lib/python3.8/site-packages/pandas/io/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
210 ) -> None:
211 pandas_gbq = _try_import()
--> 212 pandas_gbq.to_gbq(
213 dataframe,
214 destination_table,
~/.local/lib/python3.8/site-packages/pandas_gbq/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials, api_method, verbose, private_key)
1191 return
1192
-> 1193 connector.load_data(
1194 dataframe,
1195 destination_table_ref,
~/.local/lib/python3.8/site-packages/pandas_gbq/gbq.py in load_data(self, dataframe, destination_table_ref, chunksize, schema, progress_bar, api_method, billing_project)
584
585 try:
--> 586 chunks = load.load_chunks(
587 self.client,
588 dataframe,
~/.local/lib/python3.8/site-packages/pandas_gbq/load.py in load_chunks(client, dataframe, destination_table_ref, chunksize, schema, location, api_method, billing_project)
235 ):
236 if api_method == "load_parquet":
--> 237 load_parquet(
238 client,
239 dataframe,
~/.local/lib/python3.8/site-packages/pandas_gbq/load.py in load_parquet(client, dataframe, destination_table_ref, location, schema, billing_project)
127
128 try:
--> 129 client.load_table_from_dataframe(
130 dataframe,
131 destination_table_ref,
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/client.py in load_table_from_dataframe(self, dataframe, destination, num_retries, job_id, job_id_prefix, location, project, job_config, parquet_compression, timeout)
2670
2671 finally:
-> 2672 os.remove(tmppath)
2673
2674 def load_table_from_json(
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp1yeitxcu_job_4b7daa39.parquet'
A solution please
Solution 1:[1]
As Ricco D has mentioned, when writing the dataframe to the table, the BigQuery client creates temporary files on the host system, then removes it once the dataframe is written. The source code of the client for your reference. The linked code chunk does the following operations.
- Create a temporary file
- Load the temporary file into the table
- Delete the file after loading.
The error you are facing is from the 1st step. There is not enough space for the BigQuery client to create the temporary file. So, consider deleting unused files from the host system for the client to create the temporary files.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Kabilan Mohanraj |