I'm trying to extract the information on each twitter like but it only returns the wrong number of likes or none at all. I'm pretty sure my code is correct. I believe it might be because of the fact that Twitter is trying to prevent people from web scrape information from their site. Is there a way to fix this? Also is there a way to see everyone that has liked a specific tweet?
import re
import requests
import urllib
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
from bs4 import BeautifulSoup
import sys
import unittest, time
import openpyxl
url = ["https://twitter.com/CocaCola?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor"]
for x in url:
d = webdriver.Chrome()
actions = ActionChains(d)
d.get(x)
res = requests.get(x)
page = urllib.urlopen(x)
numb = 0;
SCROLL_PAUSE_TIME = 0.5
# Get scroll height
last_height = d.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = d.execute_script("return document.body.scrollHeight")
soup = BeautifulSoup(page, "html.parser")
for posts in soup.findAll('div',{"class":"content"}):
if(posts.find('p').text.encode('utf-8').find("Retweeted") == -1):
print(posts.find('span',{"class": "_timestamp js-short-timestamp"}).text)
print(posts.find('p').text.encode('utf-8'))
retweet = posts.find('button',{"class": "ProfileTweet-actionButton js-actionButton js-actionFavorite"})
#print(retweet.find('span',{"class":"ProfileTweet-actionCount"})["data-tweet-stat-count"])
print(retweet)
likes = posts.find('div',{"class":"ProfileTweet-action ProfileTweet-action--favorite js-toggleState"})
print(likes.find('span',{"class": "ProfileTweet-actionCountForPresentation"}))
numb = numb+1
if new_height == last_height:
break
if numb > 1:
break
if numb > 1:
break
last_height = new_height
d.close()
Aucun commentaire:
Enregistrer un commentaire