lundi 11 avril 2016

How to get data from web page generated by JavaScript using C# WebBrowser

I am trying to use C#'s WebBrowser to scrape revenue and earnings data from Nasdaq website. The data is rendered in the page with JavaScript. I ran the following C# console code and saved the page content to a file called goog.html, and then opened the file with Firefox. No desired revenue and earnings data were collected.

Please, find the final goog.html file opened by Firefox.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Windows.Forms;

namespace CSharpTest
{
    class testweb1
    {
        public const string TestUrl = "http://ift.tt/1oR2PDP";

        [STAThread]
        static void Main(string[] args)
        {
            WebBrowser wb = new WebBrowser();
            wb.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(wb_DocumentCompleted);
            wb.Navigate(TestUrl);

            while (wb.ReadyState != WebBrowserReadyState.Complete)
            {
                Application.DoEvents();
            }

            Console.WriteLine("\nPress any key to continue...");
            Console.ReadKey(true);
        }

        static void wb_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            WebBrowser wb = (WebBrowser)sender;

            HtmlElement document = wb.Document.GetElementsByTagName("html")[0];

            using (StreamWriter sw = new StreamWriter("E:\\temp\\goog.html")) {
                sw.Write(document.OuterHtml);
            }
        }
    }
}

enter image description here

enter image description here




Aucun commentaire:

Enregistrer un commentaire