I have a script to scrap the webpages from the URLs from a txt file.In txt file there are almost 150 URLs. So what I have done is I opened the file and got the URLs line by line in while loop and on each URL I executed the scrap function.The problem is that on any URL the script stuck and all other URLs after that don't scrap.Is there any solution to scrap all the webpages without any problem.I am using simple html dom parser to scrap the data. Below is the code to my file script _CURL is the function to scrap the webpage:
$handle = fopen("liste-chalet.txt", "r");
if ($handle) {
while (($line = fgets($handle)) !== false) {
$line = str_replace("\r\n", "", $line);
$data = _CURL($line);}
_CURL function :
function _CURL($url=false,$post_data=array()) {
if ($url == false){
return false;
}
$user_agent = @$_SERVER['HTTP_USER_AGENT'];
if(empty($user_agent)){
$user_agent = browserAgent();
}
$ch = curl_init();
// curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
// curl_setopt($ch, CURLOPT_TIMEOUT, 25);
curl_setopt($ch, CURLOPT_ENCODING, 1);
//curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
// curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POST, 1);
if(!empty($post_data)){
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($post_data));
}
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_URL, $url);
$data = curl_exec($ch);
$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
//echo $httpcode;die;
echo curl_error($ch);
curl_close($ch);
if ($httpcode >= 200 && $httpcode < 300) {
return $data;
} else {
return false;
}
}
Aucun commentaire:
Enregistrer un commentaire