mirror of
https://git.lolcat.ca/lolcat/4get.git
synced 2024-11-14 03:49:48 +01:00
startpage captcha handle
This commit is contained in:
parent
ff06bc1f51
commit
4e4796bb71
|
@ -408,6 +408,8 @@ class startpage{
|
|||
//$html = file_get_contents("scraper/startpage.html");
|
||||
}
|
||||
|
||||
$this->detect_captcha($html);
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/React\.createElement\(UIStartpage\.AppSerpWeb, ?(.+)\),$/m',
|
||||
|
@ -1057,6 +1059,8 @@ class startpage{
|
|||
}
|
||||
}
|
||||
|
||||
$this->detect_captcha($html);
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
|
@ -1186,6 +1190,8 @@ class startpage{
|
|||
}
|
||||
}
|
||||
|
||||
$this->detect_captcha($html);
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/React\.createElement\(UIStartpage\.AppSerpVideos, ?(.+)\),$/m',
|
||||
|
@ -1326,6 +1332,8 @@ class startpage{
|
|||
}
|
||||
}
|
||||
|
||||
$this->detect_captcha($html);
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/React\.createElement\(UIStartpage\.AppSerpNews, ?(.+)\),$/m',
|
||||
|
@ -1526,4 +1534,46 @@ class startpage{
|
|||
$text
|
||||
);
|
||||
}
|
||||
|
||||
private function detect_captcha($html){
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$title =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"title"
|
||||
);
|
||||
|
||||
if(
|
||||
count($title) !== 0 &&
|
||||
$title[0]["innerHTML"] == "Redirecting..."
|
||||
){
|
||||
|
||||
// check if it's a captcha
|
||||
$as =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
);
|
||||
|
||||
foreach($as as $a){
|
||||
|
||||
if(
|
||||
strpos(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["innerHTML"]
|
||||
),
|
||||
"https://www.startpage.com/sp/captcha"
|
||||
) !== false
|
||||
){
|
||||
|
||||
throw new Exception("Startpage returned a captcha");
|
||||
}
|
||||
}
|
||||
|
||||
throw new Exception("Startpage redirected the scraper to an unhandled page");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue