I am trying to use C#'s WebBrowser to scrape revenue and earnings data from Nasdaq website. The data is rendered in the page with JavaScript. I ran the following C# console code and saved the page content to a file called goog.html, and then opened the file with Firefox. No desired revenue and earnings data were collected.
Please, find the final goog.html file opened by Firefox.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Windows.Forms;
namespace CSharpTest
{
class testweb1
{
public const string TestUrl = "http://ift.tt/1oR2PDP";
[STAThread]
static void Main(string[] args)
{
WebBrowser wb = new WebBrowser();
wb.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(wb_DocumentCompleted);
wb.Navigate(TestUrl);
while (wb.ReadyState != WebBrowserReadyState.Complete)
{
Application.DoEvents();
}
Console.WriteLine("\nPress any key to continue...");
Console.ReadKey(true);
}
static void wb_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
WebBrowser wb = (WebBrowser)sender;
HtmlElement document = wb.Document.GetElementsByTagName("html")[0];
using (StreamWriter sw = new StreamWriter("E:\\temp\\goog.html")) {
sw.Write(document.OuterHtml);
}
}
}
}
Aucun commentaire:
Enregistrer un commentaire