'Multiple requests to Scrape different pages is giving the same result

I want to scrape the ICC cricket website and find the rankings of batsmen on a particular date. The problem is that the result that I'm getting is the same for all the dates. The scraping is giving me the list of the most recent rankings rather than the ranking at a particular date. The code is given below. Can someone tell me why this is happening/ a solution for the same? I feel that the problem is that beautifulsoup is not letting the page load completely which is in turn giving false information as the information required is received after applying filters on the website.

from bs4 import BeautifulSoup
import pandas as pd
import requests
import re
import datetime
import os
date_list = pd.date_range(start = "1971-02-01", end=datetime.date.today(), freq='1d')


def get_batsmen(date):
    url = f'https://www.icc-cricket.com/rankings/mens/player-rankings/odi/batting?at={date}'
    page = requests.get(url).text
    doc = BeautifulSoup(page, "html.parser")
    find_class = doc.find_all("td", class_ = 'table-body__cell rankings-table__name name')
    player_list = []
    find_top = doc.find('div', class_='rankings-block__banner--name-large')
    player_list.append(find_top.text)
    for item in find_class:
        player_name = item.find("a")
        # print(player_name.text)
        player_list.append(player_name.text)
    df = pd.DataFrame(player_list, columns = ['Player Name'])
    return df

def get_bowler(date):
    url = f'https://www.icc-cricket.com/rankings/mens/player-rankings/odi/bowling?at={date}'
    page = requests.get(url).text
    doc = BeautifulSoup(page, "html.parser")
    find_class = doc.find_all("td", class_ = 'table-body__cell rankings-table__name name')
    player_list = []
    find_top = doc.find('div', class_='rankings-block__banner--name-large')
    player_list.append(find_top.text)
    for item in find_class:
        player_name = item.find("a")
        # print(player_name.text)
        player_list.append(player_name.text)
    df = pd.DataFrame(player_list, columns = ['Player Name'])
    return df

def get_allrounder(date):
    url = f'https://www.icc-cricket.com/rankings/mens/player-rankings/odi/all-rounder?at={date}'
    page = requests.get(url).text
    doc = BeautifulSoup(page, "html.parser")
    find_class = doc.find_all("td", class_ = 'table-body__cell rankings-table__name name')
    player_list = []
    find_top = doc.find('div', class_='rankings-block__banner--name-large')
    player_list.append(find_top.text)
    for item in find_class:
        player_name = item.find("a")
        # print(player_name.text)
        player_list.append(player_name.text)
    df = pd.DataFrame(player_list, columns = ['Player Name'])
    return df

#Storing the data into multiple csvs

for date in date_list:
    year = date.year
    month = date.month
    day = date.day
    newpath = rf'C:\Users\divya\OneDrive\Desktop\8th Sem\ISB assignment\{year}'
    if not os.path.exists(newpath):
        os.makedirs(newpath)
    newpath1 = rf'C:\Users\divya\OneDrive\Desktop\8th Sem\ISB assignment\{year}\{month}'
    if not os.path.exists(newpath1):
        os.makedirs(newpath1)
    newpath2 = rf'C:\Users\divya\OneDrive\Desktop\8th Sem\ISB assignment\{year}\{month}\{day}'
    if not os.path.exists(newpath2):
        os.makedirs(newpath2)
    get_batsmen(date).to_csv(newpath2+'/batsmen.csv')
    get_bowler(date).to_csv(newpath2+'/bowler.csv')
    get_allrounder(date).to_csv(newpath2+'/allrounder.csv')


Solution 1:[1]

In case the website that you're scraping is interactive it can be good to look at Selenium as scraping package instead of bs4 so javascript is executed.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Arno Maeckelberghe