'Scraping the English Vivino.com reviews from the website

I have two questions about web scraping information from Vivino.com: 1.) With the code below I can scrape information and reviews from the Vivino website, however I would like to get the reviews in English or otherwise only the English reviews. Is there a way to do this? 2.) At the moment I am only scraping wines from Portugal, but I want wines from different countries. If I remove 'country_codes[] : 'pt' the code is not working anymore. How can I solve this?

Can someone help me? Thank you very much!

#!/usr/bin/env python
# coding: utf-8

# Import packages
import requests
import json
import pandas as pd

# Get request from the Vivino website
def get_wine_data(wine_id, year, page):
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
    }

    api_url = f"https://www.vivino.com/api/wines/{wine_id}/reviews?per_page=50&year={year}&page={page}" 
    print(api_url)

    data = requests.get(api_url, headers=headers).json()

    return data

# Get request from the Vivino website
r = requests.get(
    "https://www.vivino.com/api/explore/explore",
    params={
        "country_code": "FR",
        "country_codes[]": "pt",
        "currency_code": "EUR",
        "grape_filter": "varietal",
        "min_rating": "1",
        "order_by": "price",
        "order": "asc",
        "page": 1,
        "price_range_max": "500",
        "price_range_min": "0",
        "wine_type_ids[]": "1",
    },
    headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"
    },
)

# Variables to __scrape__ from the Vivino website
results = [
    (
        t["vintage"]["wine"]["winery"]["name"],
        t["vintage"]["year"],
        t["vintage"]["wine"]["id"],
        f'{t["vintage"]["wine"]["name"]} {t["vintage"]["year"]}',
        t["vintage"]["statistics"]["ratings_average"],
        t["vintage"]["statistics"]["ratings_count"],
        t["prices"][0]["amount"],
        t["vintage"]["wine"]["style"]["acidity"],
        t["vintage"]["wine"]["style"]["blurb"],
        t["vintage"]["wine"]["style"]["body"],
        t["vintage"]["wine"]["style"]["body_description"],
        t['vintage']['wine']['region']['country']['name'],
        t['vintage']['wine']['style']['description'],
        t['vintage']['wine']['style']['food'][0]['name'],
        t['vintage']['wine']['style']['food'][1]['name'],
        t['vintage']['wine']['style']['food'][2]['name'],
        t['vintage']['wine']['style']['food'][3]['name'],
        t['vintage']['wine']['style']['country']['most_used_grapes'][0]['name'],
        t['vintage']['wine']['region']['country']['most_used_grapes'][1]['name'],
        t['vintage']['wine']['region']['country']['most_used_grapes'][2]['name'],
        t['vintage']['wine']['taste']['structure']['acidity'],
        t['vintage']['wine']['taste']['structure']['calculated_structure_count'],
        t['vintage']['wine']['taste']['structure']['intensity'],
        t['vintage']['wine']['taste']['structure']['sweetness'],
        t['vintage']['wine']['taste']['structure']['tannin'],
        t['vintage']['wine']['taste']['structure']['user_structure_count'],
        t['vintage']['wine']['taste']['flavor'][0]['group'],
        t['vintage']['wine']['taste']['flavor'][0]['stats']['count'],
        t['vintage']['wine']['taste']['flavor'][1]['group'],
        t['vintage']['wine']['taste']['flavor'][1]['stats']['count'],
        t['vintage']['wine']['taste']['flavor'][2]['group'],
        t['vintage']['wine']['taste']['flavor'][2]['stats']['count'],
        t['vintage']['wine']['taste']['flavor'][3]['group'],
        t['vintage']['wine']['taste']['flavor'][3]['stats']['count'],
        t['vintage']['wine']['taste']['flavor'][4]['group'],
        t['vintage']['wine']['taste']['flavor'][4]['stats']['count'],
        t['vintage']['wine']['taste']['flavor'][5]['group'],
        t['vintage']['wine']['taste']['flavor'][5]['stats']['count'],
        t['vintage']['wine']['taste']['flavor'][6]['group'],
        t['vintage']['wine']['taste']['flavor'][6]['stats']['count']
    )
    for t in r.json()["explore_vintage"]["matches"]
]

###
for t in r.json()["explore_vintage"]["matches"][0:2]:
    wine_id = t["vintage"]["wine"]["id"]
    year = t["vintage"]["year"],
    
    with open(f'output-wine{wine_id}-year{year}.json', 'w+') as f:
        json.dump(t, f, indent=4, sort_keys=True)
###

# Saving the results in a dataframe
dataframe = pd.DataFrame(
    results,
    columns=["Winery", "Year", "Wine ID", "Wine", "Rating", "num_review", "price", "acidity",'Blurb','Body','Body_des','country','wine_des','food_1','food_2','food_3','food_4','grape_1','grape_2','grape_3','acidity_score','calculated_structure_count','intensity','sweetness','tannin','user_structure_count', 'flavor_1', 'flavor_1_count','flavor_2', 'flavor_2_count','flavor_3', 'flavor_3_count','flavor_4', 'flavor_4_count','flavor_5', 'flavor_5_count','flavor_6', 'flavor_6_count','flavor_7', 'flavor_7_count'],
             
)

# Scraping the reviews from the Vivino website
ratings = []
for _, row in dataframe.iterrows(): ######## Ik heb hier head(2) toegevoegd om te testen zodat hij maar twee wijnen doet
    page = 1
    while True:
        print(
            f'Getting info about wine {row["Wine ID"]}-{row["Year"]} Page {page}'
        )

        d = get_wine_data(row["Wine ID"], row["Year"], page)

        if not d["reviews"]:
            break

        for r in d["reviews"]:
            ratings.append(
                [
                    row["Year"],
                    row["Wine ID"],
                    r["rating"],
                    r["note"],
                    r["created_at"],
                ]
            )

        page += 1

ratings = pd.DataFrame(
    ratings, columns=["Year", "Wine ID", "User Rating", "Note", "CreatedAt"]
)

# Merging the two datasets; results and ratings.
df_out = ratings.merge(dataframe)
df_out.to_csv("data.csv", index=False)

Solution 1:^[1]

To get only english reviews, you can filter by "language" key. English reviews has it set to en
To get wines from more countries you can put the codes inside "country_codes[]" key. Fore example ["pt", "es", "fr"]

# Import packages
import requests
import json
import pandas as pd

# Get request from the Vivino website
def get_wine_data(wine_id, year, page):
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
    }

    api_url = f"https://www.vivino.com/api/wines/{wine_id}/reviews?per_page=50&year={year}&page={page}"
    print(api_url)

    data = requests.get(api_url, headers=headers).json()

    return data


# Get request from the Vivino website
r = requests.get(
    "https://www.vivino.com/api/explore/explore",
    params={
        "country_codes[]": ["pt", "es", "fr"],  # <-- put more country codes here
        "currency_code": "EUR",
        "grape_filter": "varietal",
        "min_rating": "1",
        "order_by": "price",
        "order": "asc",
        "page": 1,
        "price_range_max": "500",
        "price_range_min": "0",
        "wine_type_ids[]": "1",
    },
    headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"
    },
)

# Variables to scrape from the Vivino website
results = [
    (
        t["vintage"]["wine"]["winery"]["name"],
        t["vintage"]["year"],
        t["vintage"]["wine"]["id"],
        f'{t["vintage"]["wine"]["name"]} {t["vintage"]["year"]}',
    )
    for t in r.json()["explore_vintage"]["matches"]
]

# Saving the results in a dataframe
dataframe = pd.DataFrame(
    results,
    columns=["Winery", "Year", "Wine ID", "Wine"],
)

# Scraping the reviews from the Vivino website
ratings = []

for _, row in dataframe.iterrows():
    page = 1
    while True:
        print(
            f'Getting info about wine {row["Wine ID"]}-{row["Year"]} Page {page}'
        )

        d = get_wine_data(row["Wine ID"], row["Year"], page)

        if not d["reviews"]:
            break

        for r in d["reviews"]:
            if r["language"] != "en": # <-- get only english reviews
                continue

            ratings.append(
                [
                    row["Year"],
                    row["Wine ID"],
                    r["rating"],
                    r["note"],
                    r["created_at"],
                ]
            )

        page += 1


ratings = pd.DataFrame(
    ratings, columns=["Year", "Wine ID", "User Rating", "Note", "CreatedAt"]
)

# Merging the two datasets; results and ratings.
df_out = ratings.merge(dataframe)
df_out.to_csv("data.csv", index=False)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source
Solution 1	DisappointedByUnaccountableMod

'Scraping the English Vivino.com reviews from the website

Solution 1:[1]

Sources

Related Questions

Solution 1:^[1]