web: Auto turn over the page

mardi 19 janvier 2021

Auto turn over the page

I want to make Web scrapper.

The website is a job search site, "indeed"

The website consists of about 16pages.

and I want to scrap all pages.

But my code scrap only one page.

How can I solve it?

 import requests
 from bs4 import BeautifulSoup
 
 LIMIT = 50
 
 URL = F"https://kr.indeed.com/jobs?q=python&limit=50&radius=25"
 
 def get_last_page():
   result = requests.get(URL)
   soup = BeautifulSoup(result.text, "html.parser")
   pagination = soup.find("div", {"class":"pagination"})
   links = pagination.find_all('a')
   pages = []
   for link in links[:-1]:
     pages.append(int(link.string))
   max_page = pages[-1]
   return max_page
 
 def extract_job(html):
   title = html.find("h2", {"class": "title"}).find("a")["title"]
   company = html.find("span", {"class": "company"})
   company_anchor = company.find("a")
   if company_anchor is not None:
     company = str(company_anchor.string)
   else:
     company = str(company.string)
   company = company.strip()
   location = html.find("span", {"location"}).string
   job_id = html.find("h2",{"class":"title"}).find("a")["href"]
   return {'title' : title, 'company': company, 'location':location, "link": f"https://kr.indeed.com{job_id}"}
 
 def extract_jobs(last_page):
   print(f"Scrapping page {page}")
   jobs = []
   for page in range(last_page):
    result = requests.get(f"{URL}&start={page*LIMIT}")
    soup = BeautifulSoup(result.text, "html.parser")
    results = soup.find_all("div", {"class":"jobsearch-SerpJobCard"})
    for result in results:
      job = extract_job(result)
      jobs.append(job)
    return jobs
 
 def get_jobs():
   last_page = get_last_page()
   jobs = extract_jobs(last_page)
   return jobs

plz help me

web

mardi 19 janvier 2021

Auto turn over the page

Aucun commentaire:

Enregistrer un commentaire