python - Html in the inspect element is different that the one displayed on screen - Stack Overflow
I am trying to scrap the data from this website .aspx?Game=2009_1211_2563_2684-Lebanon
The website contains two tables: but the data displayed in the row of the tables is different than the one in the html source (after doing inspect element).
for example this is the data for the first row:
<tr class="my_pStats1" onmouseover="this.style.backgroundColor='#C3C3C3';" onmouseout="this.style.backgroundColor='#FFFFFF';" valign="center" height="17" style="background-color: rgb(255, 255, 255);">
<td class="headcol"> </td>
<td class="headcol2 my_playerName" align="left"><a class="my_playerB" href=";><font color="#0066cc">SMdRl-XIuQ, zRij</font></a></td>
<td>45</td>
<td>4-9 (38.7%)</td>
<td>0-9 (96.3%)</td>
<td>5-5 (5%)</td>
<td class="hiddensmall">5</td>
<td class="hiddensmall">6</td>
<td>6</td>
<td>1</td>
<td>6</td>
<td class="hiddensmall">5</td>
<td class="hiddensmall">5</td>
<td class="hiddensmall">6</td>
<td class="hiddensmall">5</td>
<td class="hiddensmall">8</td>
<td>86</td>
<td class="hiddensmall">5</td>
<td class="hiddensmall">5</td></tr>
but the name of the player is Jean Abdel-Nour
and not SMdRl-XIuQ, zRij
and similar thing for the numbers.
I Tried selenium but didn't work
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
def extract_box_score_from_url(url):
# Fetch the webpage content
driver = webdriver.Chrome() # Ensure ChromeDriver is installed and in PATH
driver.get(url)
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
driver.quit()
# Extract team and opponent names
team = soup.find('table', {'id': 'aannew'}).find('a').text.strip()
opponent = soup.find_all('table', {'id': 'aannew'})[1].find('a').text.strip()
# Extract headers
stats_divs = soup.find_all('div', class_='dvbs')
header_rows = stats_divs[0].find('thead').find_all('tr')
# Flatten headers by concatenating main headers and subheaders
headers = []
for th in header_rows[1].find_all('th'): # Process the second header row
main_header = th.get('colspan', None)
sub_header = th.get_text(strip=True)
headers.append(sub_header)
# Add Team and Opponent columns to headers
headers += ['Team', 'Opponent']
# Function to extract stats table for a team
def extract_team_stats(dvbs):
rows = dvbs.find('tbody').find_all('tr', class_=['my_pStats1', 'my_pStats2'])
stats = []
for row in rows:
cols = row.find_all('td')
player_data = [col.get_text(strip=True) for col in cols]
stats.append(player_data)
return stats
# Extract stats for both teams
team_stats = extract_team_stats(stats_divs[0])
opponent_stats = extract_team_stats(stats_divs[1])
# Add Team and Opponent columns
num_columns = len(headers)
team_stats = [row + [team, opponent] for row in team_stats if len(row) + 2 == num_columns]
opponent_stats = [row + [opponent, team] for row in opponent_stats if len(row) + 2 == num_columns]
# Combine data
combined_stats = team_stats + opponent_stats
# Create dataframe
df = pd.DataFrame(combined_stats, columns=headers)
return df
url = ".aspx?Game=2009_1211_2563_2684-Lebanon"
df = extract_box_score_from_url(url)
df
can you please help me in finding a way to scrape this data? I tried Selenium
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
def extract_box_score_from_url(url):
# Fetch the webpage content
driver = webdriver.Chrome() # Ensure ChromeDriver is installed and in PATH
driver.get(url)
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
driver.quit()
# Extract team and opponent names
team = soup.find('table', {'id': 'aannew'}).find('a').text.strip()
opponent = soup.find_all('table', {'id': 'aannew'})[1].find('a').text.strip()
# Extract headers
stats_divs = soup.find_all('div', class_='dvbs')
header_rows = stats_divs[0].find('thead').find_all('tr')
# Flatten headers by concatenating main headers and subheaders
headers = []
for th in header_rows[1].find_all('th'): # Process the second header row
main_header = th.get('colspan', None)
sub_header = th.get_text(strip=True)
headers.append(sub_header)
# Add Team and Opponent columns to headers
headers += ['Team', 'Opponent']
# Function to extract stats table for a team
def extract_team_stats(dvbs):
rows = dvbs.find('tbody').find_all('tr', class_=['my_pStats1', 'my_pStats2'])
stats = []
for row in rows:
cols = row.find_all('td')
player_data = [col.get_text(strip=True) for col in cols]
stats.append(player_data)
return stats
# Extract stats for both teams
team_stats = extract_team_stats(stats_divs[0])
opponent_stats = extract_team_stats(stats_divs[1])
# Add Team and Opponent columns
num_columns = len(headers)
team_stats = [row + [team, opponent] for row in team_stats if len(row) + 2 == num_columns]
opponent_stats = [row + [opponent, team] for row in opponent_stats if len(row) + 2 == num_columns]
# Combine data
combined_stats = team_stats + opponent_stats
# Create dataframe
df = pd.DataFrame(combined_stats, columns=headers)
return df
url = ".aspx?Game=2009_1211_2563_2684-Lebanon"
df = extract_box_score_from_url(url)
df
最新文章
- 谷歌回应欧盟反垄断指控:安卓利于竞争和消费者
- 传言成真:微软宣布以72亿美元收购诺基亚手机部门
- 手机防盗软件拍下“嫌疑人”可当呈堂证供
- [连载]巨头“心血之作”终失败(四):intel Larrabee图形芯片
- python - tensorflow-gpu installation failed in colab - Stack Overflow
- python - How to solve the problem of multi-layer contours obtained by using Zernick moments for sub-pixel edge detection? - Stac
- c# - TrackableBehaviour.Status type is missing when using Vuforia - Stack Overflow
- linux - Zowe Config Error popping up on VSCODE? How may I fix this error? - Stack Overflow
- testrigor - Unable to capture values from card display - Stack Overflow
- nim lang - How to parse a JSON file in Nim using the standard library? - Stack Overflow
- python - Unable to Fetch Data from PostgreSQL on Databricks - Connection Attempt Failed - Stack Overflow
- database - MongoDB total keys examined when multiple documents have the same key value - Stack Overflow
- javascript - Three Contenteditable Divs that connect as one larger text box with maxChars of 274 - Stack Overflow
- docker - Python Telegram bot freezes when run by Airflow - Stack Overflow
- c# - What could be wrong with the Import method on the oSets object here? - Stack Overflow
- caching - How is the userUX affected by cache purge by a WordPress plugin? Cookie consent settings is not affected? - Stack Over
- flutter - Exported Excel File is Blank When Using excel Package - Stack Overflow