vendredi 31 janvier 2020

How to remove accents to webscrape name using python

i have a list of names, but some have accents. i want to be able to find the page of the person without having to manually get rid of the accent on the name, which prevents the search. is there a way to even do this?

import requests
from bs4 import BeautifulSoup
import pandas as pd
from pandas import DataFrame

base_url = 'https://basketball.realgm.com'

player_names=['Ante Žižić','Anžejs Pasečņiks', 'Dario Šarić', 'Dāvis Bertāns', 'Jakob Pöltl']

# Empty DataFrame
result = pd.DataFrame()

for name in player_names:
    url = f'{base_url}/search?q={name.replace(" ", "+")}' 
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    if url == response.url:
        # Get all NBA players
        for player in soup.select('.tablesaw tr:has(a[href*="/nba/teams/"]) a[href*="/player/"]'): 
            response = requests.get(base_url + player['href'])
            player_soup = BeautifulSoup(response.content, 'lxml') 
            player_data = get_player_stats(search_name=player.text, real_name=name, player_soup=player_soup) 
            result = result.append(player_data, sort=False).reset_index(drop=True)
    else:
        player_data = get_player_stats(search_name=name, real_name=name, player_soup=soup) 
        result = result.append(player_data, sort=False).reset_index(drop=True) 



Aucun commentaire:

Enregistrer un commentaire