'Multiprocessing, missing 1 required positional argument: 'response'

I don't have really understood what happened. I was executing this code, a moment ago it works and then it returns an error.

EDITED The code takes from euronext.index() a list of 1700 (more or less) indexes. I think that the problem is on the lenght of the list: with small numbers (less then 60) it works well. When I use the entire list, it outputs that error. (I run it from Windows). TrendReq is a module python -m pip install pytrends that downloads google trends data.

import pandas as pd
import numpy as np
import multiprocessing as mp
from multiprocessing.pool import ThreadPool
from pytrends.request import TrendReq
import requests
from bs4 import BeautifulSoup

def index():
    url = 'https://live.euronext.com/en/pd/data/stocks'

    params = {
       "mics": "ALXB,ALXL,ALXP,XPAR,XAMS,XBRU,XLIS,XMLI,MLXB,ENXB,ENXL,TNLA,TNLB,XLDN,XESM,XMSM,XATL,VPXB,XOSL,XOAS,MERK",
       "display_datapoints": "dp_stocks",
       "display_filters": "df_stocks"
    }
    data = {
        "draw": "1",
        "columns[0][data]": "0",
        "columns[0][name]": "",
        "columns[0][searchable]": "true",
        "columns[0][orderable]": "true",
        "columns[0][search][value]": "",
        "columns[0][search][regex]": "false",
        "columns[1][data]": "1",
        "columns[1][name]": "",
        "columns[1][searchable]": "true",
        "columns[1][orderable]": "false",
        "columns[1][search][value]": "",
        "columns[1][search][regex]": "false",
        "columns[2][data]": "2",
        "columns[2][name]": "",
        "columns[2][searchable]": "true",
        "columns[2][orderable]": "false",
        "columns[2][search][value]": "",
        "columns[2][search][regex]": "false",
        "columns[3][data]": "3",
        "columns[3][name]": "",
        "columns[3][searchable]": "true",
        "columns[3][orderable]": "false",
        "columns[3][search][value]": "",
        "columns[3][search][regex]": "false",
        "columns[4][data]": "4",
        "columns[4][name]": "",
        "columns[4][searchable]": "true",
        "columns[4][orderable]": "false",
        "columns[4][search][value]": "",
        "columns[4][search][regex]": "false",
        "columns[5][data]": "5",
        "columns[5][name]": "",
        "columns[5][searchable]": "true",
        "columns[5][orderable]": "false",
        "columns[5][search][value]": "",
        "columns[5][search][regex]": "false",
        "columns[6][data]": "6",
        "columns[6][name]": "",
        "columns[6][searchable]": "true",
        "columns[6][orderable]": "false",
        "columns[6][search][value]": "",
        "columns[6][search][regex]": "false",
        "order[0][column]": "0",
        "order[0][dir]": "asc",
        "start": "0",
        "length": "100",
        "search[value]": "",
        "search[regex]": "false",
        "iDisplayLength": "2000",
        "iDisplayStart": "0",
        "sSortDir_0": "asc"
    }
    r = requests.post(url, params=params, data=data).json()
    allin = {'Names': []}
    for x in r['aaData']:
        soup = BeautifulSoup(x[0], 'lxml')
        allin['Names'].append(soup.a.text)
    df = pd.DataFrame(allin)
    index_list = df["Names"].tolist()

    return index_list

def collecting_data(index):
    pytrend = TrendReq()
    pytrend.build_payload(kw_list=[index])

    interest_over_time_df = pytrend.interest_over_time()
    try:
        interest_over_time_df.columns = ["Score", "isPartial"]
        returned_variable = interest_over_time_df.mean(axis = 0)[0]
    except:
        returned_variable = np.nan

    print("[ ]", index, " - processed")
    return (index, ("Score", returned_variable))



if __name__ == "__main__":
    index_list = index()
    col = ["Score"]
    df = pd.DataFrame(index=index_list, columns=col)

    pool_size = min(mp.cpu_count(), len(index_list))
    pool = mp.Pool(pool_size)

    for result in pool.imap_unordered(collecting_data, index_list):
        index, values = result
        col, value = values
        df.at[index, col] = value

    print(df)
    pool.close()

and it returns:

Exception in thread Thread-3:
Traceback (most recent call last):
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\pool.py", line 576, in _handle_results
task = get()
File "C:\Users\MP\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
TypeError: __init__() missing 1 required positional argument: 'response'

The code doesn't end. It bloks right there.



Solution 1:[1]

I switched to using a thread pool for diagnostics purposes and noticed that I would see:

The request failed: Google returned a response with code 429.

This, I believe, means you are issuing too many requests. There must be some restriction on how many requests you can make per some unit of time. So I reverted back to using a processing pool as before but modified the code as follows to catch the 429 exception. As you can see I am now getting nothing but 429 exceptions since I have probably in testing issued far too many requests. But you will need to research what the restrictions are on making requests (and possibly forgo multiprocessing).

from pytrends.exceptions import ResponseError
...

def collecting_data(index):

    try:
        pytrend = TrendReq()
        pytrend.build_payload(kw_list=[index])
        interest_over_time_df = pytrend.interest_over_time()
        interest_over_time_df.columns = ["Score", "isPartial"]
        returned_variable = interest_over_time_df.mean(axis = 0)[0]
    except (Exception, ResponseError) as e:
        print(f'collecting_data got exception for index {index}:', e)
        returned_variable = np.nan

    print("[ ]", index, " - processed")
    return (index, ("Score", returned_variable))

But almost every request ended with the Google 429 error:

collecting_data got exception for index VRANKEN-POMMERY: The request failed: Google returned a response with code 429.
[ ] VRANKEN-POMMERY  - processed
                   Score
1000MERCIS           NaN
2020 BULKERS         NaN
2CRSI                NaN
2MX ORGANIC          NaN
2MX ORGANIC BS       NaN
...                  ...
ZCI LIMITED          NaN
ZENITH ENERGY        NaN
ZENOBE GRAMME CERT   NaN
ZWIPE                NaN
ØRN SOFTWARE HLD.    NaN

[1748 rows x 1 columns]

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Booboo