jeudi 24 janvier 2019

Web pages not loading/responding Python 3

When I run the code below I can't seem to get it to run threw without getting some sort of time out error or it seems to hang forever. Individually I can get them to work most times but only the CSI site seems to work consistently. Are their any changes I could make to have the script work not only better but simpler? Am I missing something? I'm new to Python and could use some help cleaning this up.

Thank you

import csv, os, time
import pandas as pd
import numpy as np

from selenium import webdriver
from pandas import DataFrame, read_csv, set_option
from matplotlib import pyplot
from datetime import date, datetime, timedelta
from collections import Counter
from bs4 import BeautifulSoup
from tqdm import tqdm_notebook #Timer count
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from googlesearch import search

chrome_options      = webdriver.ChromeOptions()


start = time.time()
sleep_time          = 15

url_csi                 = 'http://www.csidata.com/factsheets.php?type=stock&format=html'
url_tmx                 = 'https://api.tmxmoney.com/en/migreport/search'
url_nyse                = 'https://www.nasdaq.com/screening/company-list.aspx'

database_csi            = "E:\\Stock Database\\Historical Data\\Historical Stock List\\CSI Historical Stock List\\"
database_tmx            = "E:\\Stock Database\\Historical Data\\Historical Stock List\\TMX Historical Stock List\\"
database_nyse           = "E:\\Stock Database\\Historical Data\\Historical Stock List\\NYSE Historical Stock List\\"
database_nasdaq         = "E:\\Stock Database\\Historical Data\\Historical Stock List\\NASDAQ Historical Stock List\\"
database_amex           = "E:\\Stock Database\\Historical Data\\Historical Stock List\\AMEX Historical Stock List\\"

master_file_csi         = "E:\\Stock Database\\Historical Data\\Historical Stock List\\CSI Historical Stock List\\CSI_Ticker_List_Historical.csv"
master_file_tmx         = "E:\\Stock Database\\Historical Data\\Historical Stock List\\TMX Historical Stock List\\TMX_Ticker_List_Historical.xlsx"
master_file_nyse        = "E:\\Stock Database\\Historical Data\\Historical Stock List\\NYSE Historical Stock List\\NYSE_Ticker_List_Historical.csv"
master_file_nasdaq      = "E:\\Stock Database\\Historical Data\\Historical Stock List\\NASDAQ Historical Stock List\\NASDAQ_Ticker_List_Historical.csv"
master_file_amex        = "E:\\Stock Database\\Historical Data\\Historical Stock List\\AMEX Historical Stock List\\AMEX_Ticker_List_Historical.csv"


#CSI Exchange Data Scraping

chrome_options      = webdriver.ChromeOptions()
prefs               = {'download.default_directory': database_csi}
chrome_options.add_experimental_option(name='prefs', value= prefs)
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument('--dns-prefetch-disable')
chrome_driver              = webdriver.Chrome(r"E:\Python Programs\chromedriver", chrome_options = chrome_options)

if os.path.exists(master_file_csi): os.remove(master_file_csi)
#Website
chrome_driver.get(url_csi)
#Navigate Web Page
chrome_driver.find_element_by_css_selector('body > a:nth-child(3)').click()

time.sleep(sleep_time)

chrome_driver.close()

os.rename("%s"%database_csi+"stockfactsheet.csv","%s"%(master_file_csi))

#TMX Exchange Data Scraping
chrome_options      = webdriver.ChromeOptions()
prefs               = {'download.default_directory': database_tmx}
chrome_options.add_experimental_option(name='prefs', value= prefs)
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument('--dns-prefetch-disable')
chrome_driver              = webdriver.Chrome(r"E:\Python Programs\chromedriver", chrome_options = chrome_options)

if os.path.exists(master_file_tmx): os.remove(master_file_tmx)
#Website
chrome_driver.get(url_tmx)
#Navigate Web Page
time.sleep(sleep_time)
chrome_driver.find_element_by_css_selector('#leftside > div.idt_container > form > input[type="submit"]:nth-child(3)').click()
#leftside > div.idt_container > form > input[type="submit"]:nth-child(3)
#Download Data
time.sleep(sleep_time)
chrome_driver.find_element_by_css_selector('#leftside > div.idt_containerResults > div.searchToolBox > div.idtDownload > form > input[type="submit"]:nth-child(8)').click()

time.sleep(sleep_time)

chrome_driver.close()

os.rename("%s"%database_tmx+"mig_report.xlsx","%s"%(master_file_tmx))


#NYSE Exchange Data Scraping
chrome_options      = webdriver.ChromeOptions()
prefs               = {'download.default_directory': database_nyse}
chrome_options.add_experimental_option(name='prefs', value= prefs)
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument('--dns-prefetch-disable')
chrome_driver              = webdriver.Chrome(r"E:\Python Programs\chromedriver", chrome_options = chrome_options)

if os.path.exists(master_file_nyse): os.remove(master_file_nyse)
#Website
#Navigate Web Page
chrome_driver.get(url_nyse)
time.sleep(sleep_time)
chrome_driver.find_element_by_css_selector('#companyListDownloads > table > tbody > tr:nth-child(2) > td:nth-child(2) > a').click()

time.sleep(sleep_time)

chrome_driver.close()

os.rename("%s"%database_nyse+"companylist.csv","%s"%(master_file_nyse))


#NASDAQ Exchange Data Scraping
chrome_options      = webdriver.ChromeOptions()
prefs               = {'download.default_directory': database_nasdaq}
chrome_options.add_experimental_option(name='prefs', value= prefs)
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument('--dns-prefetch-disable')
chrome_driver              = webdriver.Chrome(r"E:\Python Programs\chromedriver", chrome_options = chrome_options)

if os.path.exists(master_file_nasdaq): os.remove(master_file_nasdaq)
#Website
chrome_driver.get(url_nyse)
time.sleep(sleep_time)
#Navigate Web Page
chrome_driver.find_element_by_css_selector('#companyListDownloads > table > tbody > tr:nth-child(1) > td:nth-child(2) > a > div > svg').click()

time.sleep(sleep_time)

chrome_driver.close()

os.rename("%s"%database_nasdaq+"companylist.csv","%s"%(master_file_nasdaq))


#AMEX Exchange Data Scraping
chrome_options      = webdriver.ChromeOptions()
prefs               = {'download.default_directory': database_amex}
chrome_options.add_experimental_option(name='prefs', value= prefs)
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument('--dns-prefetch-disable')
chrome_driver              = webdriver.Chrome(r"E:\Python Programs\chromedriver", chrome_options = chrome_options)

if os.path.exists(master_file_amex): os.remove(master_file_amex)
#Website
chrome_driver.get(url_nyse)
time.sleep(sleep_time)
#Navigate Web Page
chrome_driver.find_element_by_css_selector('#companyListDownloads > table > tbody > tr:nth-child(1) > td:nth-child(2) > a > div > svg').click()

time.sleep(sleep_time)

chrome_driver.close()

os.rename("%s"%database_amex+"companylist.csv","%s"%(master_file_amex))




Aucun commentaire:

Enregistrer un commentaire