jeudi 6 juin 2019

Web scraping with getElementsByTagName()

I want to import restaurant data like Restaurant name, phone number, website & address to excel but unfortunately I am getting ads & garbage data. I have created a code using http://automatetheweb.net/vba-getelementsbytagname-method/ website but it is not helping out. Please rectify the issue in my code. Website:https://www.yellowpages.com/atlanta-ga/attorneys
Please donot refer json as it is not working on other webs.

Sub Yellowcom()
    'Dim ieObj As InternetExplorer
    Dim htmlELe As IHTMLElement
    Dim HTML As HTMLDocument
    Dim i As Integer

    Dim URL As String
    Dim URLParameter As String
    Dim page As Long
    Dim links As Object
    Dim IE As Object


    i = 1

    Set IE = CreateObject("InternetExplorer.Application")
    'Set ieObj = New InternetExplorer
    IE.Visible = True
    URL = "https://www.yellowpages.com/atlanta-ga/attorneys"
    'Application.Wait Now + TimeValue("00:00:05")

    For page = 2 To 4

        If page > 1 Then URLParameter = "?page=" & page

        IE.navigate URL & URLParameter

        ' Wait for the browser to load the page
        Do Until IE.readyState = 4

            DoEvents

        Loop

        Set HTML = IE.document
        Set links = HTML.getElementsByClassName("info")

    For Each htmlELe In links

        With ActiveSheet
            .Range("A" & i).Value = htmlELe.Children(0).textContent
            .Range("B" & i).Value = htmlELe.getElementsByTagName("a")(0).href
            .Range("C" & i).Value = htmlELe.Children(2).textContent
            .Range("D" & i).Value = htmlELe.Children(2).querySelector("a[href]")
             'links2 = htmlELe.getElementsByClassName("links")(1)
           ' .Range("D" & i).Value = links2.href


        End With
    i = i + 1

    Next htmlELe

    Next page

    IE.Quit
    Set IE = Nothing

    End Sub




Aucun commentaire:

Enregistrer un commentaire