For excerise I want to make my own Web Crawler but I have a problem with recurrent invocation of my crawl method. It should start for every link in my links array and goes so on until I decide to abort whole program but it only goes for first element in that array so it simply goes back and forth without any progress. How can I fix this?
Crawler.java
package regularmikey.mikecrawler;
import java.io.IOException;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Crawler implements Runnable {
private Elements links;
private Document doc;
private String start_url;
public Crawler(){};
public Crawler(String url){start_url = url;};
public void crawl(String url) {
try {
System.out.println(url);
doc = Jsoup.connect(url).get();
String title = doc.title();
System.out.println("title : " + title);
links = doc.select("a[href]");
for (Element link : links) {
if(AdressValidator.validAddress(link.attr("href"))) {
crawl(link.attr("href"));
}
}
} catch (org.jsoup.UnsupportedMimeTypeException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void run() {
crawl(start_url);
}
}
App.java
package regularmikey.mikecrawler;
public class App
{
public static void main( String[] args )
{
Thread thread = new Thread(new Crawler("http://facebook.com"));
thread.run();
}
}
Aucun commentaire:
Enregistrer un commentaire