I want to do web scraping with ip rotation, I created this code using stem and tor
from stem import Signal
from stem.control import Controller
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
# signal TOR for a new connection
def switchIP():
with Controller.from_port(port = 9051) as controller:
controller.authenticate()
controller.signal(Signal.NEWNYM)
# get a new selenium webdriver with tor as the proxy
def my_proxy(PROXY_HOST,PROXY_PORT):
profile = webdriver.FirefoxProfile()
# Direct = 0, Manual = 1, PAC = 2, AUTODETECT = 4, SYSTEM = 5
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.socks",PROXY_HOST)
profile.set_preference("network.proxy.socks_port",int(PROXY_PORT))
profile.update_preferences()
#options = Options()
#options.headless = True
return webdriver.Firefox(firefox_profile=profile)
# sends a request to https://whatsmyip.com/ so that we can check the IP of our request through our selenium webdriver
for x in range(10):
proxy = my_proxy("127.0.0.1", 9050)
proxy.get("https://whatsmyip.com/")
html = proxy.page_source
soup = BeautifulSoup(html, 'lxml')
print(soup.find("span", {"id": "ipv4"}))
print(soup.find("span", {"id": "ipv6"}))
switchIP()
I already updated the torrc file by adding those 2 lines:
ControlPort 9051
CookieAuthentication 1
when I run the code, it returns an error:
WebDriverException: Message: Reached error page: about:neterror?e=proxyConnectFailure&u=https%3A//whatsmyip.com/&c=UTF-8&f=regular&d=Firefox%20is%20configured%20to%20use%20a%20proxy%20server%20that%20is%20refusing%20connections.
Aucun commentaire:
Enregistrer un commentaire