I am doing a wellness site parser. Please tell me how to parse data from tabs Reviews and Phone Numbers & Directions. I received data from the profile tab.
My scraper wellness.py:
import scrapy
class Wellness(scrapy.Spider):
name = "wellness"
start_urls = ['https://www.wellness.com/find']
def parse(self, response):
for a in response.css("li.categories-li a")[5:7]:
yield response.follow(a, callback=self.state)
def state(self, response):
for a in response.css("div.find-item-container a")[0:3]:
yield response.follow(a, callback=self.city)
def city(self, response):
for a in response.css("li.categories-li a"):
yield response.follow(a, callback=self.profile_url)
def profile_url(self, response):
for a in response.css("h2 a"):
yield response.follow(a, callback=self.profile)
next_page = response.css("li.pagination-next a")
if next_page is not None:
yield response.follow(next_page, self.profile_url)
def profile(self, response):
services = response.xpath('.//span[contains(text(),"Services")]')
education = response.xpath('.//span[contains(text(),"Education")]')
training = response.xpath('.//span[contains(text(),"Training")]')
yield {
'First and Last name': response.css('h1::text').get(),
'About': response.css('.listing-about::text').get(),
'Services': services.xpath('following-sibling::span[1]/text()').extract(),
'Primary Specialty': response.css('.normal::text').get(),
'Address': ' '.join([i.strip() for i in response.css('.office-address span::text').getall()]),
'Practice': response.css('.years-in-service::text').get(),
'Education': education.xpath('following-sibling::span[1]/text()').extract(),
'Training': training.xpath('following-sibling::span[1]/text()').extract(),
'Consumer Feedback': response.css('.item-rating-container a::text').get()
}
reviews_tab = response.css("#reviews_tab a")
if next_page is not None:
yield response.follow(reviews_tab, self.reviews)
directions_tab = response.css("#directions_tab a")
if directions_tab is not None:
yield response.follow(directions_tab, self.directions)
def reviews(self, response):
yield {
'Rewiew': response.css('.listing-review-text::text').get()}
def directions(self, response):
yield{
'Number': response.css('.directions-number::text').get()}
Thank you in advanceᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ ᅠ
Aucun commentaire:
Enregistrer un commentaire