'How to deal with __EVENTVALIDATION in asp.net web scraping?
I am dealing with an asp.net page to scrape a table with all rows and all columns, finally converting the scraped data into pandas dataframe.
scrape URL: http://164.100.47.194/Loksabha/Legislation/NewAdvsearch.aspx
configuration :
total bills : 8946
The page looks like this:
Since it's an asp.net page, I used the cookies and all to fetch all data, here is my code:
import requests
from urllib import request
from bs4 import BeautifulSoup
import io
import pandas as pd
from tqdm import tqdm
import requests
def get_res(m_no = 349):
cookies = {
'ASP.NET_SessionId': 'kardiuqwo0uwtujdolqgad1w',
'BNES_ASP.NET_SessionId': 'ItVZt0BeJnb2+gWjdTq7zU5vFrp8Ms0fM9OVEFmTX8eqFxJq469Ts1hO3xbbkLa6PCxSq+jxzbM0s5S4IFhfMD497glM+D7fFOshSi5cC3PoSM8En9W87A==',
}
headers = {
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Mobile Safari/537.36',
'Origin': 'http://164.100.47.194',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Referer': 'http://164.100.47.194/Loksabha/Legislation/NewAdvsearch.aspx',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
}
data = {
'__EVENTTARGET': 'ctl00$ContentPlaceHolder1$GR1',
'__EVENTARGUMENT': f'Page${m_no}',
'__LASTFOCUS': '',
'__VIEWSTATE': '7aYz0DTI41EdOkRTXKaokkalAlKIRA2Yn..,
'__VIEWSTATEGENERATOR': 'FB1370BC',
'__VIEWSTATEENCRYPTED': '',
'__EVENTVALIDATION': '+og7lwBVNlGN1QWH9gZLzCX+nqWMEivwKlanLpQbCjkjyuBDEgOQCxvNrCInJdJaZa5kCbxNfYRhYlZaeycwqbrEMJ99Ggm3VR8zEtyCI0PgWWFFI/TwPCW9UeuiMf1qsvtrXiN7MOaOvBJe1sdeHMq+d0R64ljxEXTtEvTMKu6RMQxoCPGviHWy0XySK0uHU1IiS83YIGPr8AJ2uctPf15ADdJriWCxQChdhVmB8hfu3/+gA82WpEE7/XQKI2C7I8PwEUUHN3UD5n+R718zAyhBwcg3iMEAz8VXKiqTqEbEalPsUEKApRjdAGMoODbpkrvOUyGyrHyljRr8texSaMwAleO1eR0puFu348QzRXmF08FAEp4gKGwFKAuAJwNInaceisGUYp2x+4Mcwh8I9OTv6UIf2co/VnDcNInMO6rhBskl0anm/Qw6vD0RwsnQI5ikq/N3vlymoCUW/X8vNZxeLIt1PNolXrXnVMgPRKmolrkVMGGef709vc1DEw6+muqD66YA8gGwN4S00vi4xgL3qHq7BB24KfZCIO/Hom+eA05TEW/1vh0nzqm0Z1RZV48XQR0+nQrBp7COnRZeXy3A6QL4kwXr8WDhTVHQYcunP/LIBE4Rzy0klaW9aDwSCbvKUh7LUmR7OH2wKeUZxf3h18etyTGCBqVTOp4pd9eBfYrs7V6ta34dp7kd4bib64v0y9vdFwPB6r3OAOtUb5ANYW/0egj3s+F5kiSF3FLaZugzW0A5f5o8xA8V6b+xdYqTJ1mVdIf0YuP2qpbwjzycBRl0Kybo0/dT3xe9z2dZtcGQUGpEy9mUjIcHii21jna7Os7Ci55svErDz7ifFcQu/+j7svCFjWQFInooKDSgjCsN4Lc5Nf8W6wqMVnvKYsaWKaK2Hp5tdULiNBDFnHB90QphddvDidOKwqqvY0HU8Fz+dbGEWbTTmHMrmRMXFY75W2hid8F0G6Bihapo3RBZ/L7XgIDOEJY3tdflc6FIQGBCC/L96MBMoJsNZeQUucOhcewRv9g3pcqEDkOAKHQivJQZw55TvtJ8LmsDf76/bun7Nx/tCzDgohQTl6HohGeDAMg5wdcxShrGZYY2qWov6seC7L59I44XLilYTM4p5yxBz+gQh//FHHQiyujZhGi2GNVC4GQQer+jiKnkrg8tMeOvGp6obNXI/KMPvB7oA77MjNawokMHjWWn4ygEgbAMacwgE7H7DqRQYAB7wTb1QrC4weLKCWPuzNzOJXGJwvsWrDKTLrPem7lLBU8tJ8WFgj3deSNwiEoGLy+WXkaAhnZ2fFfh6lj2GvUDeuq1tH+VGQXYg+VB/Xb4HV3iln2dFgxsfmbm3EBfJ3SRM+vF6pYRLl0UtXe+52lL076SyzvPo0ODCqjH5wfD70H9u5xoTIOPhCjHEo6KU2xvQvy8R70jAuvPJZXysfkllCaVCigy0thyDhbQhBUXsw0v0IX0w4bRkV8zvnRupid5VVjBksQwC/SCbPQ94+SeJUAUc7MYWBH4e8zggA1Nv/gPZ3bUv+Y6RZYkPDFjmKifEK1WxjOXmMm+dI//Jn+CFcTDCAyBneTtTkDEGioGheb4bbMjnSHGKJ4aTfGOj4CGi0Quilwybo0h5yAzilwEg6sB0oSPyNF8AfUcXXg2P4o/5mmTEstQFeutYVLrH40lz45oA507fv7aGAgS6lWQXd5lkggy+8gwyDs9lamvyiO6+XZdUIE6AAV7ZCcJskbp62qaPmH2HVEu3HiLlnhC2mkoEAJsTQ8OlT1kZfzt1Xdt7WsV6CNt0eaPfdw78lggOjvVHHT08Kh78HJD1vzhDfAzXWXZgw58xcqYgsYaKtXcVTyMv5ZMBesDtLsmvNIavoZlMkqQJFA8NhSB1gI9ME4ESZhV5Oh/4whkGXRdRv6+pkGiClTIS9pGGGDvgC7u5zF7hzQXOmrdpbwQRSIbQgadRka8RpsdyhrPbHSRZdFef1E1yYbk9FvY89vNCftjxkYS08tHhyVHFArxPTD5ON1n13MRW//xp/LUHNCbzZ6Sy/8zAtB8IWI57EC/4tQZ7mopb0AKNjbEasTKthk3T9nH26MBOYpuFTg2nJpBOFEUPr9AfwmuNoNqMGvCwyJwsZwPP3+ywxXFk99Ji5D8CboGw7pWlCYlSM+jYuDqFOE69M3iUl0a8P6CoWL+P3fSAFILyqZC+58Fy3+ncyBr2Xix78SbbDhTx1DGopD0QVrydFufC8hjeYULnqiwPiTWB4IXa13bxJW7dgji6nljFoFpnGPz4UzgeXOiZySxsBAcKri3uCljbhlnXztcrZTBRB9nMISXj/XFk8PFEgwfhKzYrWAzPrmD/48Wd1C2o/dj0FtYRmsjjKPlnRu/qr0i20KOtmdYmOS0MrIjX4C02g/9r/Abm6KgGoszBuNVBnnN1HMgLNADJCCiVIRghb/vqPlyozaulFqzg0md468NAxQE8Gcwkc+2rEJ2LgbLXNn38FcIVKiIA2233LYj13E0rxMUT2Fp+UhmxS4Umr7rPmiLv7RjJOz497x/ai9VJjt0c4SfUxSzwesEyMqCHJkrqUUlpA+CbIE2iM1YJ7y485ZZPQUbqo6LddDSAwAHSkUURhG9domV0QGiXwLNmecenLd5AX48Rvj3SMb9cPxXox8e1k7e+yJtmUIp56a0tpbUdOwLtfon6Meu6XznF8RKQbe2exVppbCl63N24EFXo+BvJA071zwF2aOxG43dC+VIHN6JnJg0ZGBQTMxcXtWDGLBp8wiuEzSYTAHxVRXD+XOD2OWP0uY1k/Z4RBosqHd0B/3FxTb2yB6ZUCRcfMyuOCAv/yTRmedncDyuomL5FJ+qUxLDdyDt7Fc4v4EUQqmrCLvyY7v6II/uPIwofIWJl3FyFgrfA60OEOnjxzVsvyOrUUNAz84I0kZy0iqxhnUwjbg2/QwtsUTeuTpwta9aYGhZI3gcBCaE1DXGcSCagmO1ubYC4ptOX0Xmcq0Fo1mYQtWjfC/AoRDUvbA1fLguL2HR89CmsJKGuMX4s0RoMo8yoj8CYNP8X6op77P8lmdCqfS2imVsnB3YdDkrUaXovoUwbxwPJSWF2pq1dODM+6PJMVZjhJd/AQCaUd+cpDGqBAwIdztlTMirvJMa+N8hdYIz+FveLPZkicYHSk+85PNW4SJXIVN+ARLnYlSnQTmfLM3dsS80F/BqYB5k/bIaKX3o/Rh3JGl6PMfVjifIHUbMHIfUCdDFW41omwB2UwZ7dJvAr/Osu7Ymz8OdVal1lnNF3dMtqDSRarkVfCoc6YbvZ+b1KdbTe5C0MsVPCsEPX8B0HWqcdZK3LXkurm0KhPDaolBgtBWx183hzDGfF9/XV6XNtalf6a8ki+pQ0FcPWUUabsvJHfV1YC4q4ULrpX491LiAzTFYmDak0FoYzcPfny9tom01ASM7FeY6JP+GHet6luWMFcnGmpTYX9Bol8H0r5BC8NOixaT3kQN6pszkfXIqsrrMGK4/juC1L3/iMbhs/lQlauKEKG35+dbprgjjYJOVEvb7DK2CkOl7Fd31jD7X71qEqbTm7CVc7ObLY4pEy9wWx9a8ll6CpYQG+Dd34xVXBC8qmh4/6ayswSCxNCqNQOlKI4RuOYOjYWV95kSIcDhXKEui6dauGyZveHWhN3P0tSx9UTbCRyefNfpqd0gieJwZKXmFcGiWxPRb1Jnup+F4+RbkhlXcwR02oafW0Iag8Kr4eRAQ1awSDNKvko++/l4DCPLcGOb6nKM/Himv3VxfEy8cFa0Ph8VFb9nODzjAIOYryOZx5HCg6jBo0jN5qWmKAyXJpng4MibXtAg9j89vmfdfYV1qyGosDSNvaDFrYRlyi7Wn0SViNnmwHaJmVI0AhrYqHkS5CLVTdL61A4l7QNDJA+72Sx7AiFeWp8rGiScRyMtjGLHxgIGsu3MmdUovIWNspsymh3KRpyLC5j8qSryrRNCtebLL1Wmpzy7WhStpNvupW9KhuOeOp8KF2oB32WQW04qRO7TQV5qHAJImR2omI/iJCeXboL1vpE601MXxbBw1nRXBORh4j2ZaiO/uKIC2nVf1igptUKGOl13MDuHagrfwaoIsuw4aQyXaLVPrwyYQcr8TNLjQzX5Iu2iGU5fsDvRjTusRCnrh33bIgCboYky334h1MBxoVLlJcDEoUkferqtkeeYm+TN2odMf/sNJqoEEjQtyTLeQG7mALxEXwO9BoRF83Ore0eGzihe51Wq0G54L7UaB/gYKSpPhE+I+9GvidcR/33rscGvEzs2iPWZ0pvFp2kwFNAz9L4PBSviHQpTEcu1oZAo+N3h0aMJoVOkTrQvU1/2ZJ7cGY/DhJ1RAFGCtFKeQw5pvf6biGRlcLkc4SZ4/pYt7dZu+Yw/xnbe95iwWLn/Zt013zzZSxwar1B8DOL+BFTko2sExJwiUDDOxl5gYbY6FI17O2hCJJCwGJszfperZZwmo+AyXBssyfWwLdtxW8omQQ+KYYD8QDeixfkCSbXNptWbYyEQeHLXXzq9JjlP4IJXiq546MzrY+BMJf5NjqgOeXnRrqLlQ9CiSb9GA3wU5xlNAOi7zvHnm/GjCRV7og2JKMmHzMWkTGKpAhzLjbqTnXuFdxNuzmhlX70ju/g89VBM3TWsLqncBtR9h0qOT6NoLbVRcpLKBqLg4u8iIXa7rlH67Si4oUsovQ7X//0KTzYLxk7AkRfFAsTb5hnaQUKSmT0vb/4UuY+ur/ou8vuaKiOcbWGCjjoa6Y0jUJjwj6teWV78q1qsbhx8ZcKqjBsy+PUld9/BpOxH1ENBWC4MZTuFmLbv9ObanJ6wyT+gSE6HV2gtnkbo2zWDnmi2iwx9QRF0eVeYiZjxR++Rhf3EQHrWr0/UGM5FtkSY8ZQqPrgrj6fJnrA1GsG78UF7+pUc6ZdgijyclqYE10MYCHbhhKBuD99sTV5+EIHXM9ajTelT1yrSpiUo1+N620iM5H5dhC+ucLVc8QZwZ1JfJB8pFwjuNUzd+iI+BkG3AWHmnfzhTZrC0at/pnjC29GimsMUoWQistGyQWNmpfj+5ZRMhwsPHvFkHzfBi5uw4vf/3gc0HSa+eAFo2G74aVAuz5aiWEZ8Qk7bDcQlZNVdzcExwE4CQfBbjxNDQxMxzU7sgzCEKsklaKlTr+CEdLz96RLLn6qudEAsmWt36r3LpAivvFxnpD3GdbdOP8BzDfXIHWUOoRn7q6O/+wW/jLnOoq4FXCOuHDAM2YlqawFFYnlYK67ZQ+71ciYgXzO0aZl6PNv4blXUeDBKMBtHo5AruEtFmLLriO1fBdNmBEOP0o/Iz6TwaG+KpxPxDhsg5nWIuwCixRpjkomMjyJidU3g3VHFiUHS4WQVXvNSvb/f8XDAX5BFRtNTL3PBumZPA1AGDOFI5WWelbiRpmAaHnEU2IZp2ep3z912Q70+bA8uKhS+a0YUFAxudnrkMq80jZjHbrNQ8JcdEjdPvAFLJrSSUoSb8/G1dYGPHnXvY+qhHS+Eli+AnMY/oZAMS7um3W7da+MyVj0dSj3YLt3DjYgMfOqwzmUPAqWGkn/JHJ1XiycZsiPmP3aHfkaKtUAnWhqFPtCmheh0OgEzqB0BJ2ztiudxw4kiDnh5JUTVZwc8nrPzq6pVgTtpElewHTWORE12Kya9rBcQv+n+uBSrtUIPuwAxDIdIBmySbnktObUP1x14umuhxMlxA727DNqsqvxLARzl2XGR3e//MqqgP',
'ctl00$txtSearchGlobal': '',
'ctl00$ContentPlaceHolder1$RadioBttnhouse': 'both',
'ctl00$ContentPlaceHolder1$RadioBttnbilltyp': 'All',
'ctl00$ContentPlaceHolder1$RadioButtonList1': '6',
'ctl00$ContentPlaceHolder1$STitle': '',
'ctl00$ContentPlaceHolder1$ddlMember': '--- Please Select ---',
'ctl00$ContentPlaceHolder1$RadioBttnmember': 'Current',
'ctl00$ContentPlaceHolder1$ddlMinistry': '--- Please Select ---',
'ctl00$ContentPlaceHolder1$ddlCategory': '--- Please Select ---',
'ctl00$ContentPlaceHolder1$ddlYear1': 'Select',
'ctl00$ContentPlaceHolder1$ddlYear2': 'Select',
'ctl00$ContentPlaceHolder1$ddlfile': '.xls'
}
response = requests.post('http://164.100.47.194/Loksabha/Legislation/NewAdvsearch.aspx', headers=headers, cookies=cookies, data=data, verify=False)
soup = BeautifulSoup(response.text)
tables = soup.findChildren('table')
return tables
def get_all_bills(html):
soup = BeautifulSoup(html)
links = soup.findAll('a')
return links
def single_table_values(row_id):
df_values = {'Year': [], 'Bill No': [], 'Short Title': [],
'Type': [], 'Member': [], 'Date of Introduction / House': [],
'Debate / Passed in LS': [],
'Debate / Passed in RS': [],
'Referred to Committee / Report Presented': [],
'Assent Date/ Gazette Notification / Act No.': [], 'Synopsis': [], 'Status': [], 'link': []}
col_seq = {0: 'Year', 1: 'Bill No', 2: 'Short Title', 3: 'Type', 4: 'Member',
5: 'Date of Introduction / House', 6: 'Debate / Passed in LS',
7: 'Debate / Passed in RS',
8: 'Referred to Committee / Report Presented',
9: 'Assent Date/ Gazette Notification / Act No.',
10: 'Synopsis', 11: 'Status', 12: 'link'}
data = row_id.findChildren('td')
for value in range(len(data)):
try:
if value == 2:
try:
df_values['link'].append(data[value].find_all('a')[1]['href'])
except Exception as e:
df_values['link'].append('no_link')
df_values['Short Title'].append(data[value].text)
else:
df_values[col_seq[value]].append(data[value].text.strip())
except Exception as e:
pass
try:
df = pd.DataFrame(df_values)
except Exception as e:
df_values = {'Year': [], 'Bill No': [], 'Short Title': [],
'Type': [], 'Member': [], 'Date of Introduction / House': [],
'Debate / Passed in LS': [],
'Debate / Passed in RS': [],
'Referred to Committee / Report Presented': [],
'Assent Date/ Gazette Notification / Act No.': [], 'Synopsis': [], 'Status': [], 'link': []}
df = pd.DataFrame(df_values)
return df
The problem is above code works for page no 349:
mj = get_res(m_no = 349)
rows = mj[7].findChildren(['th', 'tr'])
single_table_values(rows[17])
the result looks like this:
But if I use some cookies and code for another page such as 3,4 etc then it scrapes no data, but if I change the _EVENTVALIDATION in data
then it works, I want to fetch all pages in the loop, how I can use the same above setup ( cookies, etc ) for all pages?
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|