lundi 3 septembre 2018

Checking if a link exists in Python3

Can somebody please help ammend my code? I am reading URL paths from a file and checking if they exist. I want the program to try to connect to the target URL and if successful print some output to let the user know the link exists.

from bs4 import BeautifulSoup
import socket
import requests
import time
from urllib import request

# Create a connection, (starting point)
def scrape_links(a_link):

    links = []

    #time.sleep(1)
    headers = {'User-Agent': 'Mozilla/5.0'}
    r = requests.get(a_link, headers=headers)
    bs_obj = BeautifulSoup(r.text, 'html.parser')
    #print(bs_obj)



    for link in bs_obj.find_all('a'):
            url = (link.get('href'))
            try:
                if url[0:7] == 'http://' or url[0:8] == 'https://' and not url in links:
                    #print(url)
                    links += [url]
            except:
                #log = open('errorlog.txt','a+')
                #log.write('An error occurred in the scrape_links function')
                #log.close()
                continue
        return links

def hunt_link(a_link):
    f = open('link.txt','r')
    string = '\nSearching for url for ' + a_link
    print(string)
    i = f.readline()
    for i in f:
        time.sleep(0.5)
        url = 'http://' + a_link + '/' + i
        print(url)
        try:
            request = urllib.request.urlopen(url)
            if request == True:
                print('[*]Found @:',url)
                return url
        except:
            continue




Aucun commentaire:

Enregistrer un commentaire