'Multiprocessing, missing 1 required positional argument: 'response'
I don't have really understood what happened. I was executing this code, a moment ago it works and then it returns an error.
EDITED
The code takes from euronext.index() a list of 1700 (more or less) indexes. I think that the problem is on the lenght of the list: with small numbers (less then 60) it works well. When I use the entire list, it outputs that error. (I run it from Windows).
TrendReq is a module python -m pip install pytrends
that downloads google trends data.
import pandas as pd
import numpy as np
import multiprocessing as mp
from multiprocessing.pool import ThreadPool
from pytrends.request import TrendReq
import requests
from bs4 import BeautifulSoup
def index():
url = 'https://live.euronext.com/en/pd/data/stocks'
params = {
"mics": "ALXB,ALXL,ALXP,XPAR,XAMS,XBRU,XLIS,XMLI,MLXB,ENXB,ENXL,TNLA,TNLB,XLDN,XESM,XMSM,XATL,VPXB,XOSL,XOAS,MERK",
"display_datapoints": "dp_stocks",
"display_filters": "df_stocks"
}
data = {
"draw": "1",
"columns[0][data]": "0",
"columns[0][name]": "",
"columns[0][searchable]": "true",
"columns[0][orderable]": "true",
"columns[0][search][value]": "",
"columns[0][search][regex]": "false",
"columns[1][data]": "1",
"columns[1][name]": "",
"columns[1][searchable]": "true",
"columns[1][orderable]": "false",
"columns[1][search][value]": "",
"columns[1][search][regex]": "false",
"columns[2][data]": "2",
"columns[2][name]": "",
"columns[2][searchable]": "true",
"columns[2][orderable]": "false",
"columns[2][search][value]": "",
"columns[2][search][regex]": "false",
"columns[3][data]": "3",
"columns[3][name]": "",
"columns[3][searchable]": "true",
"columns[3][orderable]": "false",
"columns[3][search][value]": "",
"columns[3][search][regex]": "false",
"columns[4][data]": "4",
"columns[4][name]": "",
"columns[4][searchable]": "true",
"columns[4][orderable]": "false",
"columns[4][search][value]": "",
"columns[4][search][regex]": "false",
"columns[5][data]": "5",
"columns[5][name]": "",
"columns[5][searchable]": "true",
"columns[5][orderable]": "false",
"columns[5][search][value]": "",
"columns[5][search][regex]": "false",
"columns[6][data]": "6",
"columns[6][name]": "",
"columns[6][searchable]": "true",
"columns[6][orderable]": "false",
"columns[6][search][value]": "",
"columns[6][search][regex]": "false",
"order[0][column]": "0",
"order[0][dir]": "asc",
"start": "0",
"length": "100",
"search[value]": "",
"search[regex]": "false",
"iDisplayLength": "2000",
"iDisplayStart": "0",
"sSortDir_0": "asc"
}
r = requests.post(url, params=params, data=data).json()
allin = {'Names': []}
for x in r['aaData']:
soup = BeautifulSoup(x[0], 'lxml')
allin['Names'].append(soup.a.text)
df = pd.DataFrame(allin)
index_list = df["Names"].tolist()
return index_list
def collecting_data(index):
pytrend = TrendReq()
pytrend.build_payload(kw_list=[index])
interest_over_time_df = pytrend.interest_over_time()
try:
interest_over_time_df.columns = ["Score", "isPartial"]
returned_variable = interest_over_time_df.mean(axis = 0)[0]
except:
returned_variable = np.nan
print("[ ]", index, " - processed")
return (index, ("Score", returned_variable))
if __name__ == "__main__":
index_list = index()
col = ["Score"]
df = pd.DataFrame(index=index_list, columns=col)
pool_size = min(mp.cpu_count(), len(index_list))
pool = mp.Pool(pool_size)
for result in pool.imap_unordered(collecting_data, index_list):
index, values = result
col, value = values
df.at[index, col] = value
print(df)
pool.close()
and it returns:
Exception in thread Thread-3:
Traceback (most recent call last):
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\pool.py", line 576, in _handle_results
task = get()
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
TypeError: __init__() missing 1 required positional argument: 'response'
The code doesn't end. It bloks right there.
Solution 1:[1]
I switched to using a thread pool for diagnostics purposes and noticed that I would see:
The request failed: Google returned a response with code 429.
This, I believe, means you are issuing too many requests. There must be some restriction on how many requests you can make per some unit of time. So I reverted back to using a processing pool as before but modified the code as follows to catch the 429 exception. As you can see I am now getting nothing but 429 exceptions since I have probably in testing issued far too many requests. But you will need to research what the restrictions are on making requests (and possibly forgo multiprocessing).
from pytrends.exceptions import ResponseError
...
def collecting_data(index):
try:
pytrend = TrendReq()
pytrend.build_payload(kw_list=[index])
interest_over_time_df = pytrend.interest_over_time()
interest_over_time_df.columns = ["Score", "isPartial"]
returned_variable = interest_over_time_df.mean(axis = 0)[0]
except (Exception, ResponseError) as e:
print(f'collecting_data got exception for index {index}:', e)
returned_variable = np.nan
print("[ ]", index, " - processed")
return (index, ("Score", returned_variable))
But almost every request ended with the Google 429 error:
collecting_data got exception for index VRANKEN-POMMERY: The request failed: Google returned a response with code 429.
[ ] VRANKEN-POMMERY - processed
Score
1000MERCIS NaN
2020 BULKERS NaN
2CRSI NaN
2MX ORGANIC NaN
2MX ORGANIC BS NaN
... ...
ZCI LIMITED NaN
ZENITH ENERGY NaN
ZENOBE GRAMME CERT NaN
ZWIPE NaN
ØRN SOFTWARE HLD. NaN
[1748 rows x 1 columns]
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Booboo |