jeudi 18 avril 2019

Web Scraping by Elements

I'm trying to crap the data from web-page, for similar web page I raised a question previously and it was answered by QHarr which was really helpful and it works fine. But now it won't work for a web-site for which all the format is same like as class, tag everything. I am getting error is "Subscript ot of range" and it's highlighting on "ReDim results(1 To rowCount, 1 To numColumns)" code.

I got answer on page: Web Scraping by TagName the code works fine for: https://www.neighborhoodselfstorage.net/self-storage-ocean-city-md-88769

Now I am trying to use the same code for: https://www.stormore.net/self-storage-seattle-wa-101616#utm_source=GoogleLocal&utm_medium=WRLocal&utm_campaign=101616

Please anybody help to solve this problem.

Option Explicit  
Public Sub GetInfo()
Dim ws As Worksheet, html As HTMLDocument, s As String
Const URL As String = "https://www.stormore.net/self-storage-seattle-wa-101616#utm_source=GoogleLocal&utm_medium=WRLocal&utm_campaign=101616"

Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
    .Open "GET", URL, False
    .setRequestHeader "User-Agent", "Mozilla/5.0"
    .send
    s = .responseText
    html.body.innerHTML = s

    Dim headers(), results(), listings As Object, amenities As String

    headers = Array("Size", "Description", "Amenities", "Offer1", "Offer2", "RateType", "Price")
    Set listings = html.querySelectorAll(".main li[class]")

    Dim rowCount As Long, numColumns As Long, r As Long, c As Long
    Dim icons As Object, icon As Long, amenitiesInfo(), i As Long, item As Long

    rowCount = listings.Length
    numColumns = UBound(headers) + 1

    ReDim results(1 To rowCount, 1 To numColumns)
    Dim html2 As HTMLDocument
    Set html2 = New HTMLDocument
    For item = 0 To listings.Length - 1
        r = r + 1
        html2.body.innerHTML = listings.item(item).innerHTML
        'size,description, amenities,specials offer1 offer2, rate type, price

        results(r, 1) = Trim$(html2.querySelector(".size").innerText)
        results(r, 2) = Trim$(html.querySelector(".description").innerText)
        Set icons = html2.querySelectorAll("i[title]")

        ReDim amenitiesInfo(0 To icons.Length - 1)

        For icon = 0 To icons.Length - 1
            amenitiesInfo(icon) = icons.item(icon).getAttribute("title")
        Next

        amenities = Join$(amenitiesInfo, ", ")

        results(r, 3) = amenities
        results(r, 4) = html2.querySelector(".offer1").innerText
        results(r, 5) = html2.querySelector(".offer2").innerText
        results(r, 6) = html2.querySelector(".rate-label").innerText
        results(r, 7) = html2.querySelector(".price").innerText
    Next

    ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
    ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub




Aucun commentaire:

Enregistrer un commentaire