diff --git a/README.md b/README.md
index f81ea98..c5c4909 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ https://4get.ca
- DuckDuckGo
- Brave
- Yandex
+ - Google
- Mojeek
- Marginalia
- wiby
@@ -41,10 +42,12 @@ https://4get.ca
- DuckDuckgo
- Brave
- Yandex
+ - Google
4. News
- DuckDuckGo
- Brave
+ - Google
- Mojeek
5. Music
@@ -61,7 +64,7 @@ https://4get.ca
- YouTube
- SoundCloud
-More scrapers are coming soon. I currently want to add Google web/video/news search, HackerNews (durr orange site!!) and Qwant. A shopping and files tab is also in my todo list.
+More scrapers are coming soon. I currently want to add HackerNews (durr orange site!!), Qwant, Yep and other garbage. A shopping, files, tab and more music scrapers are also on my todo list.
# Installation
This section is still to-do. You will need to figure shit out for some of the apache2 and nginx stuff. Everything else should be OK.
@@ -190,6 +193,41 @@ services:
Replace relevant values and start with `docker-compose up -d`
+## Install on Caddy
+
+1. Install dependencies:
+
+`sudo apt install caddy php8.2-dom php8.2-imagick imagemagick php8.2-curl curl php8.2-apcu git`
+
+2. Clone this repository where you want to host this from:
+
+`cd /var/www && sudo git clone https://git.konakona.moe/diowo/4get`
+
+3. Set permission on the `icons` directory inside `4get`
+
+`cd /var/www/4get/ && sudo chmod 777 -R icons/`
+
+4. Add an entry for 4get on your Caddyfile at `/etc/caddy/Caddyfile`
+
+```sh
+4get.konakona.moe {
+ root * /var/www/4get
+ file_server
+ encode gzip
+ php_fastcgi unix//var/run/php/php8.2-fpm.sock {
+ index index.php
+ }
+ redir /{path}.php{query} 301
+ try_files {path} {path}.php
+}
+```
+
+Caddy deals with SSL certificates automatically so you don't have to mess with anything. Also if needed, a sample of my Caddyfile can be found [here](https://git.konakona.moe/diowo/misc/src/branch/master/etc/caddy/Caddyfile).
+
+5. Restart Caddy
+
+`sudo systemctl restart caddy`
+
# Encryption setup
I'm schizoid (as you should) so I'm gonna setup 4096bit key encryption. To complete this step, you need a domain or subdomain in your possession. Make sure that the DNS shit for your domain has propagated properly before continuing, because certbot is a piece of shit that will error out the ass once you reach 5 attempts under an hour.
diff --git a/ami4get.php b/ami4get.php
index f2d48bf..5bb9273 100644
--- a/ami4get.php
+++ b/ami4get.php
@@ -5,8 +5,8 @@ header("Access-Control-Allow-Origin: *");
include "data/config.php";
-$bot_requests = apcu_fetch("captcha");
$real_requests = apcu_fetch("real_requests");
+$bot_requests = apcu_fetch("captcha_gen");
echo json_encode(
[
diff --git a/api.txt b/api.txt
index 70e179c..bc8ed05 100644
--- a/api.txt
+++ b/api.txt
@@ -68,8 +68,8 @@
+ Get the next page of results
- All API responses come with an array index named "nextpage". To get
- the next page of results, you must make another API call with &npt.
+ All API responses come with an array index named "npt". To get the
+ next page of results, you must make another API call with &npt.
Example ::
diff --git a/api/v1/web.php b/api/v1/web.php
index dc1a7cc..156e53e 100644
--- a/api/v1/web.php
+++ b/api/v1/web.php
@@ -39,10 +39,12 @@ if(
}
try{
- echo json_encode(
- $scraper->web($get),
- JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
- );
+
+ echo
+ json_encode(
+ $scraper->web($get),
+ JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
+ );
}catch(Exception $e){
diff --git a/data/config.php b/data/config.php
index f2ca214..0f14a42 100644
--- a/data/config.php
+++ b/data/config.php
@@ -5,7 +5,7 @@ class config{
// any parameters.
// 4get version. Please keep this updated
- const VERSION = 5;
+ const VERSION = 6;
// Will be shown pretty much everywhere.
const SERVER_NAME = "4get";
@@ -56,14 +56,22 @@ class config{
const INSTANCES = [
"https://4get.ca",
"https://4get.zzls.xyz",
+ "https://4getus.zzls.xyz",
"https://4get.silly.computer",
"https://4g.opnxng.com",
- "https://4get.konakona.moe"
+ "https://4get.konakona.moe",
+ "https://4get.lvkaszus.pl",
+ "https://4g.ggtyler.dev",
+ "https://4get.perennialte.ch",
+ "https://4get.sihj.net",
+ "https://4get.hbubli.cc",
+ "https://4get.plunked.party",
+ "https://4get.seitan-ayoub.lol"
];
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
- const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0";
+ const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/120.0";
// Proxy pool assignments for each scraper
// false = Use server's raw IP
diff --git a/lib/captcha_gen.php b/lib/captcha_gen.php
index 6728747..abcab7a 100644
--- a/lib/captcha_gen.php
+++ b/lib/captcha_gen.php
@@ -7,6 +7,7 @@ class captcha{
// check if we want captcha
if(config::BOT_PROTECTION !== 1){
+ apcu_inc("real_requests");
if($output === true){
$frontend->loadheader(
$get,
@@ -45,6 +46,8 @@ class captcha{
}else{
// the cookie is OK! dont die() and give results
+ apcu_inc("real_requests");
+
if($output === true){
$frontend->loadheader(
$get,
@@ -175,6 +178,8 @@ class captcha{
apcu_inc($key, 1, $stupid, 86400);
+ apcu_inc("real_requests");
+
setcookie(
"pass",
$key,
@@ -197,7 +202,7 @@ class captcha{
$error = "
";
}
}
-
+
// get the positions for the answers
// will return between 3 and 6 answer positions
$range = range(0, 15);
diff --git a/lib/frontend.php b/lib/frontend.php
index 0f9f95d..bef12aa 100644
--- a/lib/frontend.php
+++ b/lib/frontend.php
@@ -25,7 +25,7 @@ class frontend{
if($theme != "Dark"){
- $replacements["style"] = ' ';
+ $replacements["style"] = ' ';
}else{
$replacements["style"] = "";
@@ -84,6 +84,8 @@ class frontend{
){
// bot detected !!
+ apcu_inc("captcha_gen");
+
$this->drawerror(
"Tshh, blocked!",
'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running your own 4get instance or using the API .',
@@ -889,7 +891,7 @@ class frontend{
"ddg" => "DuckDuckGo",
"brave" => "Brave",
"yandex" => "Yandex",
- //"google" => "Google",
+ "google" => "Google",
"mojeek" => "Mojeek",
"marginalia" => "Marginalia",
"wiby" => "wiby"
@@ -921,8 +923,8 @@ class frontend{
//"fb" => "Facebook videos",
"ddg" => "DuckDuckGo",
"brave" => "Brave",
- "yandex" => "Yandex"
- //"google" => "Google"
+ "yandex" => "Yandex",
+ "google" => "Google"
]
];
break;
@@ -933,7 +935,7 @@ class frontend{
"option" => [
"ddg" => "DuckDuckGo",
"brave" => "Brave",
- //"google" => "Google",
+ "google" => "Google",
"mojeek" => "Mojeek"
]
];
diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php
index cb5d38d..bd161ce 100644
--- a/lib/fuckhtml.php
+++ b/lib/fuckhtml.php
@@ -15,7 +15,7 @@ class fuckhtml{
if(!isset($html["innerHTML"])){
- throw new Exception("(load) Supplied array doesn't contain a innerHTML index");
+ throw new Exception("(load) Supplied array doesn't contain an innerHTML index");
}
$html = $html["innerHTML"];
}
@@ -35,6 +35,11 @@ class fuckhtml{
$this->strlen = strlen($this->html);
}
+ public function getloadedhtml(){
+
+ return $this->html;
+ }
+
public function getElementsByTagName(string $tagname){
$out = [];
@@ -46,7 +51,7 @@ class fuckhtml{
if($tagname == "*"){
- $tagname = '[^\/<>\s]+';
+ $tagname = '[A-Za-z0-9._-]+';
}else{
$tagname = preg_quote(strtolower($tagname));
@@ -126,7 +131,7 @@ class fuckhtml{
}
);
- // computer the indent level for each element
+ // compute the indent level for each element
$level = [];
$count = count($out);
@@ -314,7 +319,7 @@ class fuckhtml{
if(!isset($html["innerHTML"])){
- throw new Exception("(getTextContent) Supplied array doesn't contain a innerHTML index");
+ throw new Exception("(getTextContent) Supplied array doesn't contain an innerHTML index");
}
$html = $html["innerHTML"];
}
@@ -441,4 +446,27 @@ class fuckhtml{
return json_decode($json_out, true);
}
+
+ public function parseJsString($string){
+
+ return
+ preg_replace_callback(
+ '/\\\u[A-Fa-f0-9]{4}|\\\x[A-Fa-f0-9]{2}/',
+ function($match){
+
+ if($match[0][1] == "u"){
+
+ return json_decode('"' . $match[0] . '"');
+ }else{
+
+ return mb_convert_encoding(
+ stripcslashes($match[0]),
+ "utf-8",
+ "windows-1252"
+ );
+ }
+ },
+ $string
+ );
+ }
}
diff --git a/robots.txt b/robots.txt
index 3e608cc..6c10c2a 100644
--- a/robots.txt
+++ b/robots.txt
@@ -24,5 +24,5 @@
User-agent: *
Disallow:
-host: 4get.ca
-sitemap: https://4get.ca/sitemap.xml
+Host: 4get.ca
+Sitemap: https://4get.ca/sitemap
diff --git a/scraper/brave.php b/scraper/brave.php
index bd1cd80..8be55ac 100644
--- a/scraper/brave.php
+++ b/scraper/brave.php
@@ -857,7 +857,9 @@ class brave{
// parse ratings
if(
isset($info["ratings"]) &&
- $info["ratings"] != "void 0"
+ $info["ratings"] != "void 0" &&
+ is_array($info["ratings"]) &&
+ count($info["ratings"]) !== 0
){
$description[] = [
@@ -1183,7 +1185,7 @@ class brave{
"title" => $news["title"],
"author" => null,
"description" => $news["description"],
- "date" => !isset($news["age"]) || $news["age"] == "void 0" ? null : strtotime($news["age"]),
+ "date" => !isset($news["age"]) || $news["age"] == "void 0" || $news["age"] == "null" ? null : strtotime($news["age"]),
"thumb" => $thumb,
"url" => $news["url"]
];
diff --git a/scraper/ddg.php b/scraper/ddg.php
index 2d737ba..4a0d11f 100644
--- a/scraper/ddg.php
+++ b/scraper/ddg.php
@@ -545,8 +545,6 @@ class ddg{
public function web($get){
- $proxy = null;
-
if($get["npt"]){
[$jsgrep, $proxy] = $this->backend->get($get["npt"], "web");
diff --git a/scraper/google.php b/scraper/google.php
index 055d12a..bf2b0e4 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -16,713 +16,496 @@ class google{
public function getfilters($page){
+ $base = [
+ "country" => [ // gl=
+ "display" => "Country",
+ "option" => [
+ "any" => "Instance's country",
+ "af" => "Afghanistan",
+ "al" => "Albania",
+ "dz" => "Algeria",
+ "as" => "American Samoa",
+ "ad" => "Andorra",
+ "ao" => "Angola",
+ "ai" => "Anguilla",
+ "aq" => "Antarctica",
+ "ag" => "Antigua and Barbuda",
+ "ar" => "Argentina",
+ "am" => "Armenia",
+ "aw" => "Aruba",
+ "au" => "Australia",
+ "at" => "Austria",
+ "az" => "Azerbaijan",
+ "bs" => "Bahamas",
+ "bh" => "Bahrain",
+ "bd" => "Bangladesh",
+ "bb" => "Barbados",
+ "by" => "Belarus",
+ "be" => "Belgium",
+ "bz" => "Belize",
+ "bj" => "Benin",
+ "bm" => "Bermuda",
+ "bt" => "Bhutan",
+ "bo" => "Bolivia",
+ "ba" => "Bosnia and Herzegovina",
+ "bw" => "Botswana",
+ "bv" => "Bouvet Island",
+ "br" => "Brazil",
+ "io" => "British Indian Ocean Territory",
+ "bn" => "Brunei Darussalam",
+ "bg" => "Bulgaria",
+ "bf" => "Burkina Faso",
+ "bi" => "Burundi",
+ "kh" => "Cambodia",
+ "cm" => "Cameroon",
+ "ca" => "Canada",
+ "cv" => "Cape Verde",
+ "ky" => "Cayman Islands",
+ "cf" => "Central African Republic",
+ "td" => "Chad",
+ "cl" => "Chile",
+ "cn" => "China",
+ "cx" => "Christmas Island",
+ "cc" => "Cocos (Keeling) Islands",
+ "co" => "Colombia",
+ "km" => "Comoros",
+ "cg" => "Congo",
+ "cd" => "Congo, the Democratic Republic",
+ "ck" => "Cook Islands",
+ "cr" => "Costa Rica",
+ "ci" => "Cote D'ivoire",
+ "hr" => "Croatia",
+ "cu" => "Cuba",
+ "cy" => "Cyprus",
+ "cz" => "Czech Republic",
+ "dk" => "Denmark",
+ "dj" => "Djibouti",
+ "dm" => "Dominica",
+ "do" => "Dominican Republic",
+ "ec" => "Ecuador",
+ "eg" => "Egypt",
+ "sv" => "El Salvador",
+ "gq" => "Equatorial Guinea",
+ "er" => "Eritrea",
+ "ee" => "Estonia",
+ "et" => "Ethiopia",
+ "fk" => "Falkland Islands (Malvinas)",
+ "fo" => "Faroe Islands",
+ "fj" => "Fiji",
+ "fi" => "Finland",
+ "fr" => "France",
+ "gf" => "French Guiana",
+ "pf" => "French Polynesia",
+ "tf" => "French Southern Territories",
+ "ga" => "Gabon",
+ "gm" => "Gambia",
+ "ge" => "Georgia",
+ "de" => "Germany",
+ "gh" => "Ghana",
+ "gi" => "Gibraltar",
+ "gr" => "Greece",
+ "gl" => "Greenland",
+ "gd" => "Grenada",
+ "gp" => "Guadeloupe",
+ "gu" => "Guam",
+ "gt" => "Guatemala",
+ "gn" => "Guinea",
+ "gw" => "Guinea-Bissau",
+ "gy" => "Guyana",
+ "ht" => "Haiti",
+ "hm" => "Heard Island and Mcdonald Islands",
+ "va" => "Holy See (Vatican City State)",
+ "hn" => "Honduras",
+ "hk" => "Hong Kong",
+ "hu" => "Hungary",
+ "is" => "Iceland",
+ "in" => "India",
+ "id" => "Indonesia",
+ "ir" => "Iran, Islamic Republic",
+ "iq" => "Iraq",
+ "ie" => "Ireland",
+ "il" => "Israel",
+ "it" => "Italy",
+ "jm" => "Jamaica",
+ "jp" => "Japan",
+ "jo" => "Jordan",
+ "kz" => "Kazakhstan",
+ "ke" => "Kenya",
+ "ki" => "Kiribati",
+ "kp" => "Korea, Democratic People's Republic",
+ "kr" => "Korea, Republic",
+ "kw" => "Kuwait",
+ "kg" => "Kyrgyzstan",
+ "la" => "Lao People's Democratic Republic",
+ "lv" => "Latvia",
+ "lb" => "Lebanon",
+ "ls" => "Lesotho",
+ "lr" => "Liberia",
+ "ly" => "Libyan Arab Jamahiriya",
+ "li" => "Liechtenstein",
+ "lt" => "Lithuania",
+ "lu" => "Luxembourg",
+ "mo" => "Macao",
+ "mk" => "Macedonia, the Former Yugosalv Republic",
+ "mg" => "Madagascar",
+ "mw" => "Malawi",
+ "my" => "Malaysia",
+ "mv" => "Maldives",
+ "ml" => "Mali",
+ "mt" => "Malta",
+ "mh" => "Marshall Islands",
+ "mq" => "Martinique",
+ "mr" => "Mauritania",
+ "mu" => "Mauritius",
+ "yt" => "Mayotte",
+ "mx" => "Mexico",
+ "fm" => "Micronesia, Federated States",
+ "md" => "Moldova, Republic",
+ "mc" => "Monaco",
+ "mn" => "Mongolia",
+ "ms" => "Montserrat",
+ "ma" => "Morocco",
+ "mz" => "Mozambique",
+ "mm" => "Myanmar",
+ "na" => "Namibia",
+ "nr" => "Nauru",
+ "np" => "Nepal",
+ "nl" => "Netherlands",
+ "an" => "Netherlands Antilles",
+ "nc" => "New Caledonia",
+ "nz" => "New Zealand",
+ "ni" => "Nicaragua",
+ "ne" => "Niger",
+ "ng" => "Nigeria",
+ "nu" => "Niue",
+ "nf" => "Norfolk Island",
+ "mp" => "Northern Mariana Islands",
+ "no" => "Norway",
+ "om" => "Oman",
+ "pk" => "Pakistan",
+ "pw" => "Palau",
+ "ps" => "Palestinian Territory, Occupied",
+ "pa" => "Panama",
+ "pg" => "Papua New Guinea",
+ "py" => "Paraguay",
+ "pe" => "Peru",
+ "ph" => "Philippines",
+ "pn" => "Pitcairn",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "pr" => "Puerto Rico",
+ "qa" => "Qatar",
+ "re" => "Reunion",
+ "ro" => "Romania",
+ "ru" => "Russian Federation",
+ "rw" => "Rwanda",
+ "sh" => "Saint Helena",
+ "kn" => "Saint Kitts and Nevis",
+ "lc" => "Saint Lucia",
+ "pm" => "Saint Pierre and Miquelon",
+ "vc" => "Saint Vincent and the Grenadines",
+ "ws" => "Samoa",
+ "sm" => "San Marino",
+ "st" => "Sao Tome and Principe",
+ "sa" => "Saudi Arabia",
+ "sn" => "Senegal",
+ "cs" => "Serbia and Montenegro",
+ "sc" => "Seychelles",
+ "sl" => "Sierra Leone",
+ "sg" => "Singapore",
+ "sk" => "Slovakia",
+ "si" => "Slovenia",
+ "sb" => "Solomon Islands",
+ "so" => "Somalia",
+ "za" => "South Africa",
+ "gs" => "South Georgia and the South Sandwich Islands",
+ "es" => "Spain",
+ "lk" => "Sri Lanka",
+ "sd" => "Sudan",
+ "sr" => "Suriname",
+ "sj" => "Svalbard and Jan Mayen",
+ "sz" => "Swaziland",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "sy" => "Syrian Arab Republic",
+ "tw" => "Taiwan, Province of China",
+ "tj" => "Tajikistan",
+ "tz" => "Tanzania, United Republic",
+ "th" => "Thailand",
+ "tl" => "Timor-Leste",
+ "tg" => "Togo",
+ "tk" => "Tokelau",
+ "to" => "Tonga",
+ "tt" => "Trinidad and Tobago",
+ "tn" => "Tunisia",
+ "tr" => "Turkey",
+ "tm" => "Turkmenistan",
+ "tc" => "Turks and Caicos Islands",
+ "tv" => "Tuvalu",
+ "ug" => "Uganda",
+ "ua" => "Ukraine",
+ "ae" => "United Arab Emirates",
+ "uk" => "United Kingdom",
+ "us" => "United States",
+ "um" => "United States Minor Outlying Islands",
+ "uy" => "Uruguay",
+ "uz" => "Uzbekistan",
+ "vu" => "Vanuatu",
+ "ve" => "Venezuela",
+ "vn" => "Viet Nam",
+ "vg" => "Virgin Islands, British",
+ "vi" => "Virgin Islands, U.S.",
+ "wf" => "Wallis and Futuna",
+ "eh" => "Western Sahara",
+ "ye" => "Yemen",
+ "zm" => "Zambia",
+ "zw" => "Zimbabwe"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // safe=active
+ "no" => "No" // safe=off
+ ]
+ ],
+ "lang" => [ // lr= (prefix lang with "lang_")
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "ar" => "Arabic",
+ "bg" => "Bulgarian",
+ "ca" => "Catalan",
+ "cs" => "Czech",
+ "da" => "Danish",
+ "de" => "German",
+ "el" => "Greek",
+ "en" => "English",
+ "es" => "Spanish",
+ "et" => "Estonian",
+ "fi" => "Finnish",
+ "fr" => "French",
+ "hr" => "Croatian",
+ "hu" => "Hungarian",
+ "id" => "Indonesian",
+ "is" => "Icelandic",
+ "it" => "Italian",
+ "iw" => "Hebrew",
+ "ja" => "Japanese",
+ "ko" => "Korean",
+ "lt" => "Lithuanian",
+ "lv" => "Latvian",
+ "nl" => "Dutch",
+ "no" => "Norwegian",
+ "pl" => "Polish",
+ "pt" => "Portuguese",
+ "ro" => "Romanian",
+ "ru" => "Russian",
+ "sk" => "Slovak",
+ "sl" => "Slovenian",
+ "sr" => "Serbian",
+ "sv" => "Swedish",
+ "tr" => "Turkish",
+ "zh-CN" => "Chinese (Simplified)",
+ "zh-TW" => "Chinese (Traditional)"
+ ]
+ ]
+ ];
+
switch($page){
case "web":
- case "videos":
- case "news":
- return [
- "country" => [ // gl=
- "display" => "Country",
- "option" => [
- "any" => "Instance's country",
- "af" => "Afghanistan",
- "al" => "Albania",
- "dz" => "Algeria",
- "as" => "American Samoa",
- "ad" => "Andorra",
- "ao" => "Angola",
- "ai" => "Anguilla",
- "aq" => "Antarctica",
- "ag" => "Antigua and Barbuda",
- "ar" => "Argentina",
- "am" => "Armenia",
- "aw" => "Aruba",
- "au" => "Australia",
- "at" => "Austria",
- "az" => "Azerbaijan",
- "bs" => "Bahamas",
- "bh" => "Bahrain",
- "bd" => "Bangladesh",
- "bb" => "Barbados",
- "by" => "Belarus",
- "be" => "Belgium",
- "bz" => "Belize",
- "bj" => "Benin",
- "bm" => "Bermuda",
- "bt" => "Bhutan",
- "bo" => "Bolivia",
- "ba" => "Bosnia and Herzegovina",
- "bw" => "Botswana",
- "bv" => "Bouvet Island",
- "br" => "Brazil",
- "io" => "British Indian Ocean Territory",
- "bn" => "Brunei Darussalam",
- "bg" => "Bulgaria",
- "bf" => "Burkina Faso",
- "bi" => "Burundi",
- "kh" => "Cambodia",
- "cm" => "Cameroon",
- "ca" => "Canada",
- "cv" => "Cape Verde",
- "ky" => "Cayman Islands",
- "cf" => "Central African Republic",
- "td" => "Chad",
- "cl" => "Chile",
- "cn" => "China",
- "cx" => "Christmas Island",
- "cc" => "Cocos (Keeling) Islands",
- "co" => "Colombia",
- "km" => "Comoros",
- "cg" => "Congo",
- "cd" => "Congo, the Democratic Republic",
- "ck" => "Cook Islands",
- "cr" => "Costa Rica",
- "ci" => "Cote D'ivoire",
- "hr" => "Croatia",
- "cu" => "Cuba",
- "cy" => "Cyprus",
- "cz" => "Czech Republic",
- "dk" => "Denmark",
- "dj" => "Djibouti",
- "dm" => "Dominica",
- "do" => "Dominican Republic",
- "ec" => "Ecuador",
- "eg" => "Egypt",
- "sv" => "El Salvador",
- "gq" => "Equatorial Guinea",
- "er" => "Eritrea",
- "ee" => "Estonia",
- "et" => "Ethiopia",
- "fk" => "Falkland Islands (Malvinas)",
- "fo" => "Faroe Islands",
- "fj" => "Fiji",
- "fi" => "Finland",
- "fr" => "France",
- "gf" => "French Guiana",
- "pf" => "French Polynesia",
- "tf" => "French Southern Territories",
- "ga" => "Gabon",
- "gm" => "Gambia",
- "ge" => "Georgia",
- "de" => "Germany",
- "gh" => "Ghana",
- "gi" => "Gibraltar",
- "gr" => "Greece",
- "gl" => "Greenland",
- "gd" => "Grenada",
- "gp" => "Guadeloupe",
- "gu" => "Guam",
- "gt" => "Guatemala",
- "gn" => "Guinea",
- "gw" => "Guinea-Bissau",
- "gy" => "Guyana",
- "ht" => "Haiti",
- "hm" => "Heard Island and Mcdonald Islands",
- "va" => "Holy See (Vatican City State)",
- "hn" => "Honduras",
- "hk" => "Hong Kong",
- "hu" => "Hungary",
- "is" => "Iceland",
- "in" => "India",
- "id" => "Indonesia",
- "ir" => "Iran, Islamic Republic",
- "iq" => "Iraq",
- "ie" => "Ireland",
- "il" => "Israel",
- "it" => "Italy",
- "jm" => "Jamaica",
- "jp" => "Japan",
- "jo" => "Jordan",
- "kz" => "Kazakhstan",
- "ke" => "Kenya",
- "ki" => "Kiribati",
- "kp" => "Korea, Democratic People's Republic",
- "kr" => "Korea, Republic",
- "kw" => "Kuwait",
- "kg" => "Kyrgyzstan",
- "la" => "Lao People's Democratic Republic",
- "lv" => "Latvia",
- "lb" => "Lebanon",
- "ls" => "Lesotho",
- "lr" => "Liberia",
- "ly" => "Libyan Arab Jamahiriya",
- "li" => "Liechtenstein",
- "lt" => "Lithuania",
- "lu" => "Luxembourg",
- "mo" => "Macao",
- "mk" => "Macedonia, the Former Yugosalv Republic",
- "mg" => "Madagascar",
- "mw" => "Malawi",
- "my" => "Malaysia",
- "mv" => "Maldives",
- "ml" => "Mali",
- "mt" => "Malta",
- "mh" => "Marshall Islands",
- "mq" => "Martinique",
- "mr" => "Mauritania",
- "mu" => "Mauritius",
- "yt" => "Mayotte",
- "mx" => "Mexico",
- "fm" => "Micronesia, Federated States",
- "md" => "Moldova, Republic",
- "mc" => "Monaco",
- "mn" => "Mongolia",
- "ms" => "Montserrat",
- "ma" => "Morocco",
- "mz" => "Mozambique",
- "mm" => "Myanmar",
- "na" => "Namibia",
- "nr" => "Nauru",
- "np" => "Nepal",
- "nl" => "Netherlands",
- "an" => "Netherlands Antilles",
- "nc" => "New Caledonia",
- "nz" => "New Zealand",
- "ni" => "Nicaragua",
- "ne" => "Niger",
- "ng" => "Nigeria",
- "nu" => "Niue",
- "nf" => "Norfolk Island",
- "mp" => "Northern Mariana Islands",
- "no" => "Norway",
- "om" => "Oman",
- "pk" => "Pakistan",
- "pw" => "Palau",
- "ps" => "Palestinian Territory, Occupied",
- "pa" => "Panama",
- "pg" => "Papua New Guinea",
- "py" => "Paraguay",
- "pe" => "Peru",
- "ph" => "Philippines",
- "pn" => "Pitcairn",
- "pl" => "Poland",
- "pt" => "Portugal",
- "pr" => "Puerto Rico",
- "qa" => "Qatar",
- "re" => "Reunion",
- "ro" => "Romania",
- "ru" => "Russian Federation",
- "rw" => "Rwanda",
- "sh" => "Saint Helena",
- "kn" => "Saint Kitts and Nevis",
- "lc" => "Saint Lucia",
- "pm" => "Saint Pierre and Miquelon",
- "vc" => "Saint Vincent and the Grenadines",
- "ws" => "Samoa",
- "sm" => "San Marino",
- "st" => "Sao Tome and Principe",
- "sa" => "Saudi Arabia",
- "sn" => "Senegal",
- "cs" => "Serbia and Montenegro",
- "sc" => "Seychelles",
- "sl" => "Sierra Leone",
- "sg" => "Singapore",
- "sk" => "Slovakia",
- "si" => "Slovenia",
- "sb" => "Solomon Islands",
- "so" => "Somalia",
- "za" => "South Africa",
- "gs" => "South Georgia and the South Sandwich Islands",
- "es" => "Spain",
- "lk" => "Sri Lanka",
- "sd" => "Sudan",
- "sr" => "Suriname",
- "sj" => "Svalbard and Jan Mayen",
- "sz" => "Swaziland",
- "se" => "Sweden",
- "ch" => "Switzerland",
- "sy" => "Syrian Arab Republic",
- "tw" => "Taiwan, Province of China",
- "tj" => "Tajikistan",
- "tz" => "Tanzania, United Republic",
- "th" => "Thailand",
- "tl" => "Timor-Leste",
- "tg" => "Togo",
- "tk" => "Tokelau",
- "to" => "Tonga",
- "tt" => "Trinidad and Tobago",
- "tn" => "Tunisia",
- "tr" => "Turkey",
- "tm" => "Turkmenistan",
- "tc" => "Turks and Caicos Islands",
- "tv" => "Tuvalu",
- "ug" => "Uganda",
- "ua" => "Ukraine",
- "ae" => "United Arab Emirates",
- "uk" => "United Kingdom",
- "us" => "United States",
- "um" => "United States Minor Outlying Islands",
- "uy" => "Uruguay",
- "uz" => "Uzbekistan",
- "vu" => "Vanuatu",
- "ve" => "Venezuela",
- "vn" => "Viet Nam",
- "vg" => "Virgin Islands, British",
- "vi" => "Virgin Islands, U.S.",
- "wf" => "Wallis and Futuna",
- "eh" => "Western Sahara",
- "ye" => "Yemen",
- "zm" => "Zambia",
- "zw" => "Zimbabwe"
+ return array_merge(
+ $base,
+ [
+ "newer" => [ // &sort=review-date:r:20090301:20090430
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
]
- ],
- "nsfw" => [
- "display" => "NSFW",
- "option" => [
- "yes" => "Yes", // safe=active
- "no" => "No" // safe=off
- ]
- ],
- "lang" => [ // lr= (prefix lang with "lang_")
- "display" => "Language",
- "option" => [
- "any" => "Any language",
- "ar" => "Arabic",
- "bg" => "Bulgarian",
- "ca" => "Catalan",
- "cs" => "Czech",
- "da" => "Danish",
- "de" => "German",
- "el" => "Greek",
- "en" => "English",
- "es" => "Spanish",
- "et" => "Estonian",
- "fi" => "Finnish",
- "fr" => "French",
- "hr" => "Croatian",
- "hu" => "Hungarian",
- "id" => "Indonesian",
- "is" => "Icelandic",
- "it" => "Italian",
- "iw" => "Hebrew",
- "ja" => "Japanese",
- "ko" => "Korean",
- "lt" => "Lithuanian",
- "lv" => "Latvian",
- "nl" => "Dutch",
- "no" => "Norwegian",
- "pl" => "Polish",
- "pt" => "Portuguese",
- "ro" => "Romanian",
- "ru" => "Russian",
- "sk" => "Slovak",
- "sl" => "Slovenian",
- "sr" => "Serbian",
- "sv" => "Swedish",
- "tr" => "Turkish",
- "zh-CN" => "Chinese (Simplified)",
- "zh-TW" => "Chinese (Traditional)"
- ]
- ],
- "newer" => [ // &sort=review-date:r:20090301:20090430
- "display" => "Newer than",
- "option" => "_DATE"
- ],
- "older" => [
- "display" => "Older than",
- "option" => "_DATE"
]
- ];
+ );
break;
case "images":
- return [
- "country" => [ // gl=
- "display" => "Country",
- "option" => [
- "any" => "Instance's country",
- "af" => "Afghanistan",
- "al" => "Albania",
- "dz" => "Algeria",
- "as" => "American Samoa",
- "ad" => "Andorra",
- "ao" => "Angola",
- "ai" => "Anguilla",
- "aq" => "Antarctica",
- "ag" => "Antigua and Barbuda",
- "ar" => "Argentina",
- "am" => "Armenia",
- "aw" => "Aruba",
- "au" => "Australia",
- "at" => "Austria",
- "az" => "Azerbaijan",
- "bs" => "Bahamas",
- "bh" => "Bahrain",
- "bd" => "Bangladesh",
- "bb" => "Barbados",
- "by" => "Belarus",
- "be" => "Belgium",
- "bz" => "Belize",
- "bj" => "Benin",
- "bm" => "Bermuda",
- "bt" => "Bhutan",
- "bo" => "Bolivia",
- "ba" => "Bosnia and Herzegovina",
- "bw" => "Botswana",
- "bv" => "Bouvet Island",
- "br" => "Brazil",
- "io" => "British Indian Ocean Territory",
- "bn" => "Brunei Darussalam",
- "bg" => "Bulgaria",
- "bf" => "Burkina Faso",
- "bi" => "Burundi",
- "kh" => "Cambodia",
- "cm" => "Cameroon",
- "ca" => "Canada",
- "cv" => "Cape Verde",
- "ky" => "Cayman Islands",
- "cf" => "Central African Republic",
- "td" => "Chad",
- "cl" => "Chile",
- "cn" => "China",
- "cx" => "Christmas Island",
- "cc" => "Cocos (Keeling) Islands",
- "co" => "Colombia",
- "km" => "Comoros",
- "cg" => "Congo",
- "cd" => "Congo, the Democratic Republic",
- "ck" => "Cook Islands",
- "cr" => "Costa Rica",
- "ci" => "Cote D'ivoire",
- "hr" => "Croatia",
- "cu" => "Cuba",
- "cy" => "Cyprus",
- "cz" => "Czech Republic",
- "dk" => "Denmark",
- "dj" => "Djibouti",
- "dm" => "Dominica",
- "do" => "Dominican Republic",
- "ec" => "Ecuador",
- "eg" => "Egypt",
- "sv" => "El Salvador",
- "gq" => "Equatorial Guinea",
- "er" => "Eritrea",
- "ee" => "Estonia",
- "et" => "Ethiopia",
- "fk" => "Falkland Islands (Malvinas)",
- "fo" => "Faroe Islands",
- "fj" => "Fiji",
- "fi" => "Finland",
- "fr" => "France",
- "gf" => "French Guiana",
- "pf" => "French Polynesia",
- "tf" => "French Southern Territories",
- "ga" => "Gabon",
- "gm" => "Gambia",
- "ge" => "Georgia",
- "de" => "Germany",
- "gh" => "Ghana",
- "gi" => "Gibraltar",
- "gr" => "Greece",
- "gl" => "Greenland",
- "gd" => "Grenada",
- "gp" => "Guadeloupe",
- "gu" => "Guam",
- "gt" => "Guatemala",
- "gn" => "Guinea",
- "gw" => "Guinea-Bissau",
- "gy" => "Guyana",
- "ht" => "Haiti",
- "hm" => "Heard Island and Mcdonald Islands",
- "va" => "Holy See (Vatican City State)",
- "hn" => "Honduras",
- "hk" => "Hong Kong",
- "hu" => "Hungary",
- "is" => "Iceland",
- "in" => "India",
- "id" => "Indonesia",
- "ir" => "Iran, Islamic Republic",
- "iq" => "Iraq",
- "ie" => "Ireland",
- "il" => "Israel",
- "it" => "Italy",
- "jm" => "Jamaica",
- "jp" => "Japan",
- "jo" => "Jordan",
- "kz" => "Kazakhstan",
- "ke" => "Kenya",
- "ki" => "Kiribati",
- "kp" => "Korea, Democratic People's Republic",
- "kr" => "Korea, Republic",
- "kw" => "Kuwait",
- "kg" => "Kyrgyzstan",
- "la" => "Lao People's Democratic Republic",
- "lv" => "Latvia",
- "lb" => "Lebanon",
- "ls" => "Lesotho",
- "lr" => "Liberia",
- "ly" => "Libyan Arab Jamahiriya",
- "li" => "Liechtenstein",
- "lt" => "Lithuania",
- "lu" => "Luxembourg",
- "mo" => "Macao",
- "mk" => "Macedonia, the Former Yugosalv Republic",
- "mg" => "Madagascar",
- "mw" => "Malawi",
- "my" => "Malaysia",
- "mv" => "Maldives",
- "ml" => "Mali",
- "mt" => "Malta",
- "mh" => "Marshall Islands",
- "mq" => "Martinique",
- "mr" => "Mauritania",
- "mu" => "Mauritius",
- "yt" => "Mayotte",
- "mx" => "Mexico",
- "fm" => "Micronesia, Federated States",
- "md" => "Moldova, Republic",
- "mc" => "Monaco",
- "mn" => "Mongolia",
- "ms" => "Montserrat",
- "ma" => "Morocco",
- "mz" => "Mozambique",
- "mm" => "Myanmar",
- "na" => "Namibia",
- "nr" => "Nauru",
- "np" => "Nepal",
- "nl" => "Netherlands",
- "an" => "Netherlands Antilles",
- "nc" => "New Caledonia",
- "nz" => "New Zealand",
- "ni" => "Nicaragua",
- "ne" => "Niger",
- "ng" => "Nigeria",
- "nu" => "Niue",
- "nf" => "Norfolk Island",
- "mp" => "Northern Mariana Islands",
- "no" => "Norway",
- "om" => "Oman",
- "pk" => "Pakistan",
- "pw" => "Palau",
- "ps" => "Palestinian Territory, Occupied",
- "pa" => "Panama",
- "pg" => "Papua New Guinea",
- "py" => "Paraguay",
- "pe" => "Peru",
- "ph" => "Philippines",
- "pn" => "Pitcairn",
- "pl" => "Poland",
- "pt" => "Portugal",
- "pr" => "Puerto Rico",
- "qa" => "Qatar",
- "re" => "Reunion",
- "ro" => "Romania",
- "ru" => "Russian Federation",
- "rw" => "Rwanda",
- "sh" => "Saint Helena",
- "kn" => "Saint Kitts and Nevis",
- "lc" => "Saint Lucia",
- "pm" => "Saint Pierre and Miquelon",
- "vc" => "Saint Vincent and the Grenadines",
- "ws" => "Samoa",
- "sm" => "San Marino",
- "st" => "Sao Tome and Principe",
- "sa" => "Saudi Arabia",
- "sn" => "Senegal",
- "cs" => "Serbia and Montenegro",
- "sc" => "Seychelles",
- "sl" => "Sierra Leone",
- "sg" => "Singapore",
- "sk" => "Slovakia",
- "si" => "Slovenia",
- "sb" => "Solomon Islands",
- "so" => "Somalia",
- "za" => "South Africa",
- "gs" => "South Georgia and the South Sandwich Islands",
- "es" => "Spain",
- "lk" => "Sri Lanka",
- "sd" => "Sudan",
- "sr" => "Suriname",
- "sj" => "Svalbard and Jan Mayen",
- "sz" => "Swaziland",
- "se" => "Sweden",
- "ch" => "Switzerland",
- "sy" => "Syrian Arab Republic",
- "tw" => "Taiwan, Province of China",
- "tj" => "Tajikistan",
- "tz" => "Tanzania, United Republic",
- "th" => "Thailand",
- "tl" => "Timor-Leste",
- "tg" => "Togo",
- "tk" => "Tokelau",
- "to" => "Tonga",
- "tt" => "Trinidad and Tobago",
- "tn" => "Tunisia",
- "tr" => "Turkey",
- "tm" => "Turkmenistan",
- "tc" => "Turks and Caicos Islands",
- "tv" => "Tuvalu",
- "ug" => "Uganda",
- "ua" => "Ukraine",
- "ae" => "United Arab Emirates",
- "uk" => "United Kingdom",
- "us" => "United States",
- "um" => "United States Minor Outlying Islands",
- "uy" => "Uruguay",
- "uz" => "Uzbekistan",
- "vu" => "Vanuatu",
- "ve" => "Venezuela",
- "vn" => "Viet Nam",
- "vg" => "Virgin Islands, British",
- "vi" => "Virgin Islands, U.S.",
- "wf" => "Wallis and Futuna",
- "eh" => "Western Sahara",
- "ye" => "Yemen",
- "zm" => "Zambia",
- "zw" => "Zimbabwe"
- ]
- ],
- "nsfw" => [
- "display" => "NSFW",
- "option" => [
- "yes" => "Yes", // safe=active
- "no" => "No" // safe=off
- ]
- ],
- "lang" => [ // lr= (prefix lang with "lang_")
- "display" => "Language",
- "option" => [
- "any" => "Any language",
- "ar" => "Arabic",
- "bg" => "Bulgarian",
- "ca" => "Catalan",
- "cs" => "Czech",
- "da" => "Danish",
- "de" => "German",
- "el" => "Greek",
- "en" => "English",
- "es" => "Spanish",
- "et" => "Estonian",
- "fi" => "Finnish",
- "fr" => "French",
- "hr" => "Croatian",
- "hu" => "Hungarian",
- "id" => "Indonesian",
- "is" => "Icelandic",
- "it" => "Italian",
- "iw" => "Hebrew",
- "ja" => "Japanese",
- "ko" => "Korean",
- "lt" => "Lithuanian",
- "lv" => "Latvian",
- "nl" => "Dutch",
- "no" => "Norwegian",
- "pl" => "Polish",
- "pt" => "Portuguese",
- "ro" => "Romanian",
- "ru" => "Russian",
- "sk" => "Slovak",
- "sl" => "Slovenian",
- "sr" => "Serbian",
- "sv" => "Swedish",
- "tr" => "Turkish",
- "zh-CN" => "Chinese (Simplified)",
- "zh-TW" => "Chinese (Traditional)"
- ]
- ],
- "time" => [ // tbs=qrd:
- "display" => "Time posted",
- "option" => [
- "any" => "Any time",
- "d" => "Past 24 hours",
- "w" => "Past week",
- "m" => "Past month",
- "y" => "Past year"
- ]
- ],
- "size" => [
- "display" => "Size",
- "option" => [
- // tbs=isz:
- "any" => "Any size",
- "l" => "Large",
- "m" => "Medium",
- "i" => "Icon",
- // from here
- // tbz:lt,islt:
- "qsvga" => "Larger than 400x300",
- "vga" => "Larger than 640x480",
- "qsvga" => "Larger than 800x600",
- "xga" => "Larger than 1024x768",
- "2mp" => "Larger than 2MP",
- "4mp" => "Larger than 4MP",
- "6mp" => "Larger than 6MP",
- "8mp" => "Larger than 8MP",
- "10mp" => "Larger than 10MP",
- "12mp" => "Larger than 12MP",
- "15mp" => "Larger than 15MP",
- "20mp" => "Larger than 20MP",
- "40mp" => "Larger than 40MP",
- "70mp" => "Larger than 70MP"
- ]
- ],
- "ratio" => [ // tbs=iar:
- "display" => "Aspect ratio",
- "option" => [
- "any" => "Any ratio",
- "t" => "Tall",
- "s" => "Square",
- "w" => "Wide",
- "xw" => "Panoramic"
- ]
- ],
- "color" => [ // tbs=ic:
- "display" => "Color",
- "option" => [
- "any" => "Any color",
- "color" => "Full color",
- "gray" => "Black & white",
- "trans" => "Transparent",
- // from there, its ic:specific,isc:
- "red" => "Red",
- "orange" => "Orange",
- "yellow" => "Yellow",
- "green" => "Green",
- "teal" => "Teal",
- "blue" => "Blue",
- "purple" => "Purple",
- "pink" => "Pink",
- "white" => "White",
- "gray" => "Gray",
- "black" => "Black",
- "brown" => "Brown"
- ]
- ],
- "type" => [ // tbs=itp:
- "display" => "Type",
- "option" => [
- "any" => "Any type",
- "face" => "Faces",
- "clipart" => "Clip Art",
- "lineart" => "Line Drawing",
- "stock" => "Stock",
- "animated" => "Animated"
- ]
- ],
- "format" => [ // tbs=ift:
- "display" => "Format",
- "option" => [
- "any" => "Any format",
- "jpg" => "JPG",
- "gif" => "GIF",
- "png" => "PNG",
- "bmp" => "BMP",
- "svg" => "SVG",
- "webp" => "WEBP",
- "ico" => "ICO",
- "craw" => "RAW"
- ]
- ],
- "rights" => [ // tbs=il:
- "display" => "Usage rights",
- "option" => [
- "any" => "Any license",
- "cl" => "Creative Commons licenses",
- "ol" => "Commercial & other licenses"
+ return array_merge(
+ $base,
+ [
+ "time" => [ // tbs=qrd:
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "d" => "Past 24 hours",
+ "w" => "Past week",
+ "m" => "Past month",
+ "y" => "Past year"
+ ]
+ ],
+ "size" => [
+ "display" => "Size",
+ "option" => [
+ // tbs=isz:
+ "any" => "Any size",
+ "l" => "Large",
+ "m" => "Medium",
+ "i" => "Icon",
+ // from here
+ // tbz:lt,islt:
+ "qsvga" => "Larger than 400x300",
+ "vga" => "Larger than 640x480",
+ "qsvga" => "Larger than 800x600",
+ "xga" => "Larger than 1024x768",
+ "2mp" => "Larger than 2MP",
+ "4mp" => "Larger than 4MP",
+ "6mp" => "Larger than 6MP",
+ "8mp" => "Larger than 8MP",
+ "10mp" => "Larger than 10MP",
+ "12mp" => "Larger than 12MP",
+ "15mp" => "Larger than 15MP",
+ "20mp" => "Larger than 20MP",
+ "40mp" => "Larger than 40MP",
+ "70mp" => "Larger than 70MP"
+ ]
+ ],
+ "ratio" => [ // tbs=iar:
+ "display" => "Aspect ratio",
+ "option" => [
+ "any" => "Any ratio",
+ "t" => "Tall",
+ "s" => "Square",
+ "w" => "Wide",
+ "xw" => "Panoramic"
+ ]
+ ],
+ "color" => [ // tbs=ic:
+ "display" => "Color",
+ "option" => [
+ "any" => "Any color",
+ "color" => "Full color",
+ "gray" => "Black & white",
+ "trans" => "Transparent",
+ // from there, its ic:specific,isc:
+ "red" => "Red",
+ "orange" => "Orange",
+ "yellow" => "Yellow",
+ "green" => "Green",
+ "teal" => "Teal",
+ "blue" => "Blue",
+ "purple" => "Purple",
+ "pink" => "Pink",
+ "white" => "White",
+ "gray" => "Gray",
+ "black" => "Black",
+ "brown" => "Brown"
+ ]
+ ],
+ "type" => [ // tbs=itp:
+ "display" => "Type",
+ "option" => [
+ "any" => "Any type",
+ "face" => "Faces",
+ "clipart" => "Clip Art",
+ "lineart" => "Line Drawing",
+ "stock" => "Stock",
+ "animated" => "Animated"
+ ]
+ ],
+ "format" => [ // tbs=ift:
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "jpg" => "JPG",
+ "gif" => "GIF",
+ "png" => "PNG",
+ "bmp" => "BMP",
+ "svg" => "SVG",
+ "webp" => "WEBP",
+ "ico" => "ICO",
+ "craw" => "RAW"
+ ]
+ ],
+ "rights" => [ // tbs=il:
+ "display" => "Usage rights",
+ "option" => [
+ "any" => "Any license",
+ "cl" => "Creative Commons licenses",
+ "ol" => "Commercial & other licenses"
+ ]
]
]
- ];
+ );
+ break;
+
+ case "videos":
+ return array_merge(
+ $base,
+ [
+ "time" => [
+ "display" => "Time posted",
+ "option" => [ // tbs=qdr
+ "any" => "Any time",
+ "h" => "Past hour",
+ "d" => "Past 24 hours",
+ "w" => "Past week",
+ "m" => "Past month",
+ "y" => "Past year"
+ ]
+ ],
+ "duration" => [
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "s" => "Short (0-4min)", // tbs=dur:s
+ "m" => "Medium (4-20min)", // tbs=dur:m
+ "l" => "Long (20+ min)" // tbs=dur:l
+ ]
+ ],
+ "quality" => [
+ "display" => "Quality",
+ "option" => [
+ "any" => "Any quality",
+ "h" => "High quality" // tbs=hq:h
+ ]
+ ],
+ "captions" => [
+ "display" => "Captions",
+ "option" => [
+ "any" => "No preference",
+ "yes" => "Closed captioned" // tbs=cc:1
+ ]
+ ]
+ ]
+ );
+ break;
+
+ case "news":
+ return array_merge(
+ $base,
+ [
+ "time" => [
+ "display" => "Time posted",
+ "option" => [ // tbs=qdr
+ "any" => "Any time",
+ "h" => "Past hour",
+ "d" => "Past 24 hours",
+ "w" => "Past week",
+ "m" => "Past month",
+ "y" => "Past year",
+ "a" => "Archives" // tbs=ar:1
+ ]
+ ],
+ "sort" => [
+ "display" => "Sort",
+ "option" => [
+ "relevance" => "Relevance",
+ "date" => "Date" // sbd:1
+ ]
+ ]
+ ]
+ );
break;
}
}
@@ -773,58 +556,402 @@ class google{
curl_close($curlproc);
return $data;
}
- /*
+
+
+
public function web($get){
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $lang = $get["lang"];
- $older = $get["older"];
- $newer = $get["newer"];
+ if($get["npt"]){
+
+ [$req, $ip] = $this->backend->get($get["npt"], "web");
+ parse_str(
+ parse_url($req, PHP_URL_QUERY),
+ $search
+ );
+
+ if(isset($search["q"])){
+
+ $search = $search["q"];
+ }else{
+
+ $search = "a"; // lol
+ }
+
+ try{
+ $html =
+ $this->get(
+ $ip,
+ "https://www.google.com" . $req,
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+ }else{
+ $search = $get["s"];
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $lang = $get["lang"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+ $ip = $this->backend->get_ip();
+
+ $params = [
+ "q" => $search,
+ "num" => 20 // get 20 results
+ ];
+
+ // country
+ if($country != "any"){
+
+ $params["gl"] = $country;
+ }
+
+ // nsfw
+ $params["safe"] = $nsfw == "yes" ? "off" : "active";
+
+ // language
+ if($lang != "any"){
+
+ $params["lr"] = "lang_" . $lang;
+ }
+
+ // &sort=review-date:r:20090301:20090430
+ $older = $older === false ? false : date("Ymd", $older);
+ $newer = $newer === false ? false : date("Ymd", $newer);
+
+ if(
+ $older !== false &&
+ $newer === false
+ ){
+
+ $newer = date("Ymd", time());
+ }
+
+ if(
+ $older !== false ||
+ $newer !== false
+ ){
+
+ $params["sort"] = "review-date:r:" . $older . ":" . $newer;
+ }
+
+ try{
+ $html =
+ $this->get(
+ $ip,
+ "https://www.google.com/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+ }
- $params = [
- "num" => 20 // get 20 results
+ return $this->parsepage($html, "web", $search, $ip);
+ }
+
+
+
+ public function video($get){
+
+ if($get["npt"]){
+
+ [$req, $ip] = $this->backend->get($get["npt"], "videos");
+ parse_str(
+ parse_url($req, PHP_URL_QUERY),
+ $search
+ );
+
+ if(isset($search["q"])){
+
+ $search = $search["q"];
+ }else{
+
+ $search = "a"; // lol
+ }
+
+ try{
+
+ $html =
+ $this->get(
+ $ip,
+ "https://www.google.com" . $req,
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ }else{
+ $search = $get["s"];
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $lang = $get["lang"];
+ $time = $get["time"];
+ $duration = $get["duration"];
+ $quality = $get["quality"];
+ $captions = $get["captions"];
+ $ip = $this->backend->get_ip();
+
+ $params = [
+ "q" => $search,
+ "tbm" => "vid",
+ "num" => "20"
+ ];
+
+ // country
+ if($country != "any"){
+
+ $params["gl"] = $country;
+ }
+
+ // nsfw
+ $params["safe"] = $nsfw == "yes" ? "off" : "active";
+
+ // language
+ if($lang != "any"){
+
+ $params["lr"] = "lang_" . $lang;
+ }
+
+ $tbs = [];
+
+ // time
+ if($time != "any"){
+
+ $tbs[] = "qdr:" . $time;
+ }
+
+ // duration
+ if($duration != "any"){
+
+ $tbs[] = "dur:" . $duration;
+ }
+
+ // quality
+ if($quality != "any"){
+
+ $tbs[] = "hq:" . $quality;
+ }
+
+ // captions
+ if($captions != "any"){
+
+ $tbs[] = "cc:" . $captions;
+ }
+
+ // append tbs
+ if(count($tbs) !== 0){
+
+ $params["tbs"] =
+ implode(",", $tbs);
+ }
+
+ try{
+ $html =
+ $this->get(
+ $ip,
+ "https://www.google.com/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+ }
+
+ $json = $this->parsepage($html, "videos", $search, $ip);
+ $out = [
+ "status" => "ok",
+ "npt" => $json["npt"],
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
];
- // country
- if($country != "any"){
+ foreach($json["web"] as $item){
- $params["gl"] = $country;
+ $out["video"][] = [
+ "title" => $item["title"],
+ "description" => $item["description"],
+ "author" => [
+ "name" => null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => isset($item["table"]["Posted"]) ? strtotime($item["table"]["Posted"]) : null,
+ "duration" => isset($item["table"]["Duration"]) ? $this->hms2int($item["table"]["Duration"]) : null,
+ "views" => null,
+ "thumb" =>
+ $item["thumb"]["url"] === null ?
+ [
+ "url" => null,
+ "ratio" => null
+ ] :
+ [
+ "url" => $item["thumb"]["url"],
+ "ratio" => "16:9"
+ ],
+ "url" => $item["url"]
+ ];
}
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
+ return $out;
+ }
+
+
+
+ public function news($get){
- // language
- if($lang != "any"){
+ if($get["npt"]){
- $params["lr"] = "lang_" . $lang;
+ [$req, $ip] = $this->backend->get($get["npt"], "news");
+ parse_str(
+ parse_url($req, PHP_URL_QUERY),
+ $search
+ );
+
+ if(isset($search["q"])){
+
+ $search = $search["q"];
+ }else{
+
+ $search = "a"; // lol
+ }
+
+ try{
+
+ $html =
+ $this->get(
+ $ip,
+ "https://www.google.com" . $req,
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ }else{
+ $search = $get["s"];
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $lang = $get["lang"];
+ $time = $get["time"];
+ $sort = $get["sort"];
+ $ip = $this->backend->get_ip();
+
+ $params = [
+ "q" => $search,
+ "tbm" => "nws",
+ "num" => "20"
+ ];
+
+ // country
+ if($country != "any"){
+
+ $params["gl"] = $country;
+ }
+
+ // nsfw
+ $params["safe"] = $nsfw == "yes" ? "off" : "active";
+
+ // language
+ if($lang != "any"){
+
+ $params["lr"] = "lang_" . $lang;
+ }
+
+ $tbs = [];
+
+ // time
+ if($time != "any"){
+
+ if($time == "a"){
+
+ $tbs[] = "ar:1";
+ }else{
+
+ $tbs[] = "qdr:" . $time;
+ }
+ }
+
+ // relevance
+ if($sort == "date"){
+
+ $tbs[] = "sbd:1";
+ }
+
+ // append tbs
+ if(count($tbs) !== 0){
+
+ $params["tbs"] =
+ implode(",", $tbs);
+ }
+
+ $html =
+ $this->get(
+ $ip,
+ "https://www.google.com/search",
+ $params
+ );
}
- // &sort=review-date:r:20090301:20090430
- $older = $older === false ? false : date("Ymd", $older);
- $newer = $newer === false ? false : date("Ymd", $newer);
+ $json = $this->parsepage($html, "news", $search, $ip);
+ $out = [
+ "status" => "ok",
+ "npt" => $json["npt"],
+ "news" => []
+ ];
- if(
- $older !== false &&
- $newer === false
- ){
+ foreach($json["web"] as $item){
- $newer = date("Ymd", time());
- }
-
- if(
- $older !== false ||
- $newer !== false
- ){
-
- $params["sort"] = "review-date:r:" . $older . ":" . $newer;
+ $description = array_key_first($item["table"]);
+
+ if($description !== null){
+
+ $date = $item["table"][$description];
+ }else{
+
+ $date = null;
+ }
+
+ $out["news"][] = [
+ "title" => $item["title"],
+ "author" => $item["author"],
+ "description" => $description,
+ "date" => strtotime($date),
+ "thumb" =>
+ $item["thumb"]["url"] === null ?
+ [
+ "url" => null,
+ "ratio" => null
+ ] :
+ [
+ "url" => $item["thumb"]["url"],
+ "ratio" => "16:9"
+ ],
+ "url" => $item["url"]
+ ];
}
+ return $out;
+ }
+
+
+
+ private function parsepage($html, $pagetype, $search, $ip){
+ /*
$handle = fopen("scraper/google.html", "r");
$html = fread($handle, filesize("scraper/google.html"));
fclose($handle);
+ */
$out = [
"status" => "ok",
@@ -844,6 +971,156 @@ class google{
$this->parsejavascript($html);
+ //
+ // parse accdef's
+ //
+ $has_appended_accdef = false;
+
+ preg_match_all(
+ '/window\.jsl\.dh\(\'(accdef_[0-9]+)\',\'(.*)\'\);/',
+ $html,
+ $accdefs_regex
+ );
+
+ $accdefs = [];
+ for($i=0; $ifuckhtml
+ ->parseJsString(
+ $accdefs_regex[2][$i]
+ );
+
+ $this->fuckhtml->load($answer);
+
+ // get description
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "padding" => "12px 16px 12px",
+ ],
+ self::is_class
+ ),
+ "div"
+ )[1];
+
+ // get date (rare)
+ $date =
+ $this->fuckhtml
+ ->getElementsByTagName("sub");
+
+ if(count($date) !== 0){
+
+ $description =
+ str_replace(
+ $date[0]["outerHTML"],
+ "",
+ $description["innerHTML"]
+ );
+
+ $date =
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $date[0]
+ )
+ );
+ }else{
+
+ $date = null;
+ }
+
+ // get information table
+ $table = [];
+
+ $tbody =
+ $this->fuckhtml
+ ->getElementsByTagName("tbody");
+
+ if(count($tbody) !== 0){
+
+ $this->fuckhtml->load($tbody[0]);
+
+ $trs =
+ $this->fuckhtml
+ ->getElementsByTagName("tr");
+
+ foreach($trs as $tr){
+
+ $this->fuckhtml->load($tr);
+
+ $tds =
+ $this->fuckhtml
+ ->getElementsByTagName("td");
+
+ if(count($tds) === 2){
+
+ $table[
+ $this->fuckhtml
+ ->getTextContent(
+ $tds[0]
+ )
+ ] =
+ $this->fuckhtml
+ ->getTextContent(
+ $tds[1]
+ );
+ }
+ }
+
+ // load back what we had
+ $this->fuckhtml->load($answer);
+ }
+
+ // get title & link
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName("a")[0];
+
+ $this->fuckhtml->load($a);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName("span");
+
+ if(count($title) === 0){
+
+ continue;
+ }
+
+ $accdefs[] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ ),
+ "description" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $description
+ ),
+ "url" =>
+ $this->unshiturl(
+ $a["attributes"]["href"]
+ ),
+ "date" => $date,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => $table
+ ];
+ }
+
+ $this->fuckhtml->load($html);
+
$containers =
$this->fuckhtml
->getElementsByClassName(
@@ -863,6 +1140,94 @@ class google{
$this->fuckhtml->load($container);
+ // detect spelling
+ $spelling =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "font-size" => "20px",
+ "line-height" => "26px",
+ "padding-top" => "2px",
+ "margin-bottom" => "1px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ if(count($spelling) !== 0){
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ if(count($a) !== 0){
+
+ $scripts =
+ $this->fuckhtml
+ ->getElementsByTagName("script");
+
+ foreach($scripts as $script){
+
+ $container["innerHTML"] =
+ str_replace(
+ $script["outerHTML"],
+ "",
+ $container["innerHTML"]
+ );
+ }
+
+ $container["innerHTML"] =
+ $this->fuckhtml
+ ->getTextContent(
+ str_replace(
+ $a[0]["outerHTML"],
+ "",
+ $container["innerHTML"]
+ )
+ );
+
+ if(
+ preg_match(
+ '/^did you mean/i',
+ $container["innerHTML"]
+ )
+ ){
+
+ $out["spelling"] = [
+ "type" => "not_many",
+ "using" => $search,
+ "correction" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a[0]
+ )
+ ];
+ }
+
+ elseif(
+ preg_match(
+ '/^showing results for/i',
+ $container["innerHTML"]
+ )
+ ){
+
+ $out["spelling"] = [
+ "type" => "including",
+ "using" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a[0]
+ ),
+ "correction" => $search
+ ];
+ }
+ }
+
+ continue;
+ }
+
$title =
$this->fuckhtml
->getElementsByClassName(
@@ -891,14 +1256,7 @@ class google{
)
),
"description" => null,
- "url" =>
- $this->decodeurl(
- $this->fuckhtml
- ->getElementsByTagName("a")
- [0]
- ["attributes"]
- ["href"]
- ),
+ "url" => null,
"date" => null,
"type" => "web",
"thumb" => [
@@ -909,76 +1267,198 @@ class google{
"table" => []
];
+ // get link
+ $web["url"] =
+ $this->unshiturl(
+ $this->fuckhtml
+ ->getElementsByTagName("a")
+ [0]
+ ["attributes"]
+ ["href"]
+ );
+
+ //
+ // check if link contains a carousel
+ //
+ $carousels = $this->parsecarousels();
+ if(count($carousels) !== 0){
+
+ $first = true;
+ foreach($carousels as $carousel_cat){
+
+ foreach($carousel_cat as $carousel){
+
+ if($first === true){
+
+ $first = false;
+ }elseif($carousel["image"] !== null){
+
+ $out["image"][] = [
+ "title" => $carousel["title"],
+ "source" => [
+ [
+ "url" => $carousel["image"],
+ "width" => null,
+ "height" => null
+ ]
+ ],
+ "url" => $carousel["url"]
+ ];
+ }
+
+ $web["sublink"][] = [
+ "title" => $carousel["title"],
+ "date" => $carousel["date"],
+ "description" => $carousel["description"],
+ "url" => $carousel["url"]
+ ];
+ }
+ }
+
+ if($carousels[0][0]["image"] !== null){
+ $web["thumb"] = [
+ "url" => $carousels[0][0]["image"],
+ "ratio" => "16:9"
+ ];
+ }
+
+ $out["web"][] = $web;
+ continue;
+ }
+
+ //
+ // no carousel entries, parse as normal link
+ //
+ $this->fuckhtml->load($container);
+
+ // parse URL
+ $web["url"] =
+ $this->unshiturl(
+ $this->fuckhtml
+ ->getElementsByTagName("a")
+ [0]
+ ["attributes"]
+ ["href"]
+ );
+
$container = $container["innerHTML"];
- $description_container =
+ $line_detect =
$this->fuckhtml
->getElementsByClassName(
$this->findstyles(
[
- "padding" => "12px 16px 12px"
+ "height" => "1px",
+ "background-color" => "#dadce0",
+ "margin" => "0 16px"
],
self::is_class
),
"div"
- )[1];
+ );
+
+ if(count($line_detect) !== 0){
+
+ // we found a line, this means we're dealing with a
+ // "featured snippet"
+ $featured = true;
+
+ $description_container =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "white-space" => "pre-line",
+ "word-wrap" => "break-word"
+ ],
+ self::is_class
+ ),
+ "div"
+ )[1];
+
+ // get date node for it
+ $date =
+ $this->fuckhtml
+ ->getElementsByTagName("sub");
+
+ if(count($date) !== 0){
+ $web["date"] =
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $date[0]
+ )
+ );
+ }
+ }else{
+
+ // we're dealing with a normal link
+ $featured = false;
+
+ $description_container =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "padding" => "12px 16px 12px"
+ ],
+ self::is_class
+ ),
+ "div"
+ )[1];
+ }
+
+ //
+ // Get author if we're parsing news
+ //
+ if($pagetype == "news"){
+
+ $author =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "position" => "absolute",
+ "width" => "100%",
+ "top" => "0",
+ "left" => "0",
+ "padding-top" => "1px",
+ "margin-bottom" => "-1px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ if(count($author) !== 0){
+
+ $web["author"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $author[0]
+ );
+ }else{
+
+ $web["author"] = null;
+ }
+ }
$description =
$description_container["innerHTML"];
- // get sublinks
$this->fuckhtml->load($description);
- $links =
- $this->fuckhtml
- ->getElementsByTagName("a");
-
- $skip = true;
- foreach($links as $link){
-
- $description =
- str_replace(
- $link["outerHTML"],
- "",
- $description
- );
-
- if($skip){
-
- $skip = false;
- continue;
- }
-
- $sublink = [
- "title" => null,
- "description" => null,
- "url" => null,
- "date" => null
- ];
-
- $sublink["title"] =
- $this->fuckhtml
- ->getTextContent(
- $link
- );
-
- $sublink["url"] =
- $this->decodeurl(
- $link
- ["attributes"]
- ["href"]
- );
-
- $web["sublink"][] = $sublink;
- }
-
+ //
// get thumbnail before we call loadhtml again
+ //
$img =
$this->fuckhtml
->getElementsByTagName("img");
if(count($img) !== 0){
+ $skip = true;
+
if(
isset($img[0]["attributes"]["alt"]) &&
stripos($img[0]["attributes"]["alt"], "Video for") !== false
@@ -996,157 +1476,269 @@ class google{
$this->getimage(
$img[0]["attributes"]["id"]
);
+ }else{
+
+ $skip = false;
}
- // get table elements
- $this->fuckhtml->load($description);
-
- $levels =
+ //
+ // get sublinks
+ //
+ $links =
$this->fuckhtml
- ->getElementsByClassName(
- $this->findstyles(
- [
- "padding-bottom" => "8px"
- ],
- self::is_class
- ),
- "div"
- );
+ ->getElementsByTagName("a");
- $additional_info = [];
- foreach($levels as $level){
+ foreach($links as $link){
- $this->fuckhtml->load($level);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- $is_rating = -2;
-
- foreach($spans as $span){
+ if($skip === true){
- // clean up description
- $description =
- str_replace(
- $span["outerHTML"],
- "",
- $description
- );
-
- $innertext =
- $this->fuckhtml
- ->getTextContent(
- $span
- );
-
- if($innertext == ""){ continue; }
-
- if(
- strtolower($innertext)
- == "rating"
- ){
-
- $is_rating = -1;
- continue;
- }
-
- //
- // Parse rating object
- //
-
- if($is_rating >= -1){
-
- if($span["level"] !== 1){ continue; }
-
- $is_rating++;
-
- // 10/10 (123)
- if($is_rating === 0){
-
- $innertext = explode(" ", $innertext, 2);
-
- $web["table"]["Rating"] = $innertext[0];
- $web["table"]["Hits"] =
- trim(
- str_replace(
- [
- "(",
- ")"
- ],
- "",
- $innertext[1]
- )
- );
- continue;
- }
-
- // US$4.99
- // MYR 50.00
- // $38.34
- // JP¥6,480
- if($is_rating === 2){
-
- $web["table"]["Price"] = $innertext;
- continue;
- }
-
- // Android / In stock
- if($is_rating === 4){
-
- $web["table"]["Support"] = $innertext;
- continue;
- }
-
- // ignore the rest
- continue;
- }
-
- //
- // Parse standalone text
- //
- $additional_info[] = $innertext;
+ $skip = false;
+ continue;
}
- }
-
- for($i=0; $ifuckhtml->load($description);
-
- // get date node
- $span =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- if(count($span) !== 0){
$description =
str_replace(
- $span[0]["outerHTML"],
+ $link["outerHTML"],
"",
$description
);
- $span =
- strtotime(
+ $sublink = [
+ "title" => null,
+ "description" => null,
+ "url" => null,
+ "date" => null
+ ];
+
+ $sublink["title"] =
+ $this->titledots(
$this->fuckhtml
->getTextContent(
- $span[0]
+ $link
)
);
- if($span){
+ $sublink["url"] =
+ $this->unshiturl(
+ $link
+ ["attributes"]
+ ["href"]
+ );
+
+ if(parse_url($sublink["url"], PHP_URL_HOST) !== null){
- $web["date"] = $span;
+ $web["sublink"][] = $sublink;
+ }
+ }
+
+ //
+ // Parse spans in description
+ //
+ $this->fuckhtml->load($description);
+
+ if($featured === false){
+
+ $levels =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "padding-bottom" => "8px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ // oh my god yes, fucking great, sometimes there are NO levels
+ // hahahahahhahahahahahahahahahhahaa
+ if(count($levels) === 0){
+
+ $levels = [$description];
+ }
+
+ foreach($levels as $level){
+
+ $this->fuckhtml->load($level);
+
+ $spans =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "span"
+ );
+
+ $is_rating = -1;
+
+ foreach($spans as $span){
+
+ $innertext =
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ $span
+ ),
+ " ·."
+ );
+
+ if($innertext == ""){ continue; }
+
+ if(
+ strtolower($innertext)
+ == "rating"
+ ){
+
+ $is_rating = 0;
+
+ // clean up before we go
+ $description =
+ str_replace(
+ $span["outerHTML"],
+ "",
+ $description
+ );
+ continue;
+ }
+
+ //
+ // Parse rating object
+ //
+ if($is_rating >= 0){
+
+ // clean up description
+ $description =
+ str_replace(
+ $span["outerHTML"],
+ "",
+ $description
+ );
+
+ if($span["level"] !== 1){ continue; }
+ $is_rating++;
+
+ // 10/10 (123)
+ if($is_rating === 1){
+
+ $innertext = explode(" ", $innertext, 2);
+
+ $web["table"]["Rating"] = $innertext[0];
+
+ if(count($innertext) === 2){
+ $web["table"]["Hits"] =
+ trim(
+ str_replace(
+ [
+ "(",
+ ")"
+ ],
+ "",
+ $innertext[1]
+ )
+ );
+
+ if($web["table"]["Hits"] == ""){
+
+ unset($web["table"]["Hits"]);
+ }
+ }
+ continue;
+ }
+
+ // US$4.99
+ // MYR 50.00
+ // $38.34
+ // JP¥6,480
+ // Reviewed by your mom
+ if($is_rating === 2){
+
+ if(
+ preg_match(
+ '/^Review by (.+)/',
+ $innertext,
+ $match
+ )
+ ){
+
+ $web["table"]["Author"] = $match[1];
+ continue;
+ }
+
+ $web["table"]["Price"] = $innertext;
+ continue;
+ }
+
+ // Android / In stock
+ if($is_rating === 3){
+
+ $web["table"]["Support"] = $innertext;
+ continue;
+ }
+
+ // ignore the rest
+ continue;
+ }
+
+ //
+ // Parse standalone text
+ //
+
+ // If we reach this point:
+ // 1. Ratings have been parsed
+ // 2. We're parsing a WEB link, not some shitty piece of shit
+
+ // check for date
+ // if span has no text before it, assume it's a date
+ $desc_split =
+ explode(
+ $span["outerHTML"],
+ $description,
+ 2
+ );
+
+ if(
+ $this->fuckhtml
+ ->getTextContent(
+ $desc_split[0]
+ ) == ""
+ ){
+
+ // has no text before
+ $date = strtotime($innertext);
+ if($date){
+
+ $web["date"] = $date;
+ }
+
+ // cleanup
+ $description =
+ str_replace(
+ $span["outerHTML"],
+ "",
+ $description
+ );
+
+ continue;
+ }
+
+ // Ready to parse table
+ if(count($desc_split) === 2){
+ $this->fuckhtml->load($desc_split[1]);
+
+ $web["table"][
+ $this->fuckhtml
+ ->getTextContent(
+ trim($desc_split[0], ": ")
+ )
+ ] = $innertext;
+
+ // cleanup
+ $description =
+ str_replace(
+ $desc_split[0] . $span["outerHTML"],
+ "",
+ $description
+ );
+ }
+ }
}
}
@@ -1159,12 +1751,559 @@ class google{
" ·."
);
+ if($web["description"] == ""){
+
+ $web["description"] = null;
+ }
+
$out["web"][] = $web;
continue;
}
- // check for container title header
+ //
+ // Detect wikipedia shit
+ //
+ $wiki_title =
+ $this->fuckhtml
+ ->getElementsByTagName("h3");
+
+ if(count($wiki_title) !== 0){
+
+ $description_after = [];
+ $description = [];
+ $table = [];
+ $sublink = [];
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ foreach($as as $a){
+
+ if(
+ isset($a["attributes"]["href"]) &&
+ parse_url($a["attributes"]["href"], PHP_URL_HOST) == "maps.google.com"
+ ){
+
+ // detected maps embed, ignore
+ continue 2;
+ }
+ }
+
+ // get carousels and remove them from container for image grepper
+ $carousels = $this->parsecarousels($container["innerHTML"]);
+ $this->fuckhtml->load($container);
+
+ // add images to image tab, if applicable
+ for($i=0; $i $item["title"],
+ "source" => [
+ [
+ "url" => $item["url"],
+ "width" => $item["image_width"],
+ "height" => $item["image_height"]
+ ],
+ [
+ "url" => $item["image"],
+ "width" => $item["thumb_width"],
+ "height" => $item["thumb_height"]
+ ]
+ ],
+ "url" => $item["ref"]
+ ];
+
+ unset($carousels[$i]);
+ }
+ }
+ }
+
+ $carousels = array_values($carousels);
+
+ // interpret remaining carousels as title + carousel
+ $titles =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "font-weight" => "700",
+ "letter-spacing" => "0.75px",
+ "text-transform" => "uppercase"
+ ],
+ self::is_class
+ )
+ );
+
+ for($i=0; $i "title",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $titles[$i]
+ )
+ ];
+
+ foreach($carousels[$i] as $carousel){
+
+ $description_after[] = [
+ "type" => "link",
+ "url" => "web?s=" . urlencode($carousel["description"]) . "&scraper=google",
+ "value" => $carousel["description"]
+ ];
+
+ if($carousel["subtext"] !== null){
+
+ $description_after[] = [
+ "type" => "quote",
+ "value" => $carousel["subtext"]
+ ];
+ }
+
+ $description_after[] = [
+ "type" => "image",
+ "url" => $carousel["image"]
+ ];
+ }
+ }
+
+ $categories =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "padding" => "12px 16px 12px"
+ ],
+ self::is_class
+ )
+ );
+
+ $image =
+ $this->fuckhtml
+ ->getElementsByTagName("img");
+
+ if(count($image) !== 0){
+
+ $image = $this->getimage($image[0]["attributes"]["id"]);
+ }else{
+
+ $image = null;
+ }
+
+ $url = null;
+
+ for($i=0; $ifuckhtml->load($categories[$i]);
+
+ if($i === 0){
+ // first node. this should be the header with the small
+ // information snippet
+
+ $url =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ if(count($url) !== 0){
+
+ $url =
+ $this->unshiturl(
+ $url[0]["attributes"]["href"]
+ );
+
+ if(parse_url($url, PHP_URL_HOST) == "encrypted-tbn0.gstatic.com"){
+
+ $image = $url;
+ $url = null;
+ }
+ }else{
+
+ $url = null;
+ }
+
+ $categories[$i]["innerHTML"] =
+ str_replace(
+ $wiki_title[0]["outerHTML"],
+ "",
+ $categories[$i]["innerHTML"]
+ );
+
+ $subtext =
+ $this->fuckhtml
+ ->getTextContent(
+ $categories[$i]["innerHTML"]
+ );
+
+ if(strlen($subtext) !== 0){
+
+ $description[] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $categories[$i]["innerHTML"]
+ )
+ ];
+ }
+
+ // detect audio file
+ $audio =
+ $this->fuckhtml
+ ->getElementsByTagName("audio");
+
+ if(count($audio) !== 0){
+
+ $description[] = [
+ "type" => "audio",
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $audio[0]["attributes"]["src"]
+ )
+ ];
+ }
+ }else{
+
+ // check for separator elements IN THERE
+ $separators =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "white-space" => "pre-line",
+ "word-wrap" => "break-word"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ // detect container type
+ foreach($separators as $separator){
+
+ $this->fuckhtml->load($separator);
+
+ // ignore wrong levels
+ if($separator["level"] !== 2){
+
+ continue;
+ }
+
+ //
+ // Detect word definition
+ //
+ $wordwraps =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "padding-bottom" => "12px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ if(count($wordwraps) !== 0){
+
+ foreach($wordwraps as $word){
+
+ $this->fuckhtml->load($word);
+
+ // detect title
+ $span =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "span"
+ );
+
+ if(
+ count($span) === 1 &&
+ $this->fuckhtml
+ ->getTextContent(
+ str_replace(
+ $span[0]["outerHTML"],
+ "",
+ $word["innerHTML"]
+ )
+ ) == ""
+ ){
+
+ $description[] = [
+ "type" => "title",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $span[0]
+ )
+ ];
+ continue;
+ }
+
+ // detect list element
+ $lists =
+ $this->fuckhtml
+ ->getElementsByTagName("ol");
+
+ if(count($lists) !== 0){
+ foreach($lists as $list){
+
+ $this->fuckhtml->load($list);
+
+ $items =
+ $this->fuckhtml
+ ->getElementsByTagName("li");
+
+ $w = 0;
+ foreach($items as $item){
+
+ $w++;
+ $this->fuckhtml->load($item);
+
+ // get subnodes
+ $subnodes =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "white-space" => "pre-line",
+ "word-wrap" => "break-word"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ foreach($subnodes as $subnode){
+
+ $this->fuckhtml->load($subnode);
+
+ $spans =
+ $this->fuckhtml
+ ->getElementsByTagName("span");
+
+ if(count($spans) !== 0){
+
+ // append quote
+ $description[] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $subnode
+ )
+ ];
+ }else{
+
+ // append text
+ $description[] = [
+ "type" => "text",
+ "value" =>
+ $w . ". " .
+ $this->fuckhtml
+ ->getTextContent(
+ $subnode
+ )
+ ];
+ }
+ }
+ }
+ }
+ }else{
+
+ // parse without list
+ // get subnodes
+ $subnodes =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "white-space" => "pre-line",
+ "word-wrap" => "break-word"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ foreach($subnodes as $subnode){
+
+ $this->fuckhtml->load($subnode);
+
+ $spans =
+ $this->fuckhtml
+ ->getElementsByTagName("span");
+
+ if(count($spans) !== 0){
+
+ // append quote
+ $description[] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $subnode
+ )
+ ];
+ }else{
+
+ // append text
+ $description[] = [
+ "type" => "text",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $subnode
+ )
+ ];
+ }
+ }
+ }
+ }
+ }else{
+
+ //
+ // Parse table
+ //
+ $spans =
+ $this->fuckhtml
+ ->getElementsByTagName("span");
+
+ foreach($spans as $span){
+
+ if(!isset($span["attributes"]["class"])){
+
+ // found table
+ $row =
+ explode(
+ ":",
+ $this->fuckhtml
+ ->getTextContent(
+ $separator
+ ),
+ 2
+ );
+
+ if(count($row) === 2){
+
+ $table[rtrim($row[0])] =
+ ltrim($row[1]);
+
+ }
+ continue 2;
+ }
+ }
+
+ //
+ // Parse normal description
+ //
+ $links_rem =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ foreach($links_rem as $rem){
+
+ $separator["innerHTML"] =
+ str_replace(
+ $rem["outerHTML"],
+ "",
+ $separator["innerHTML"]
+ );
+ }
+
+ $description[] = [
+ "type" => "text",
+ "value" =>
+ rtrim(
+ $this->fuckhtml
+ ->getTextContent(
+ $separator
+ ),
+ " .,"
+ )
+ ];
+ }
+ }
+ }
+
+ // detect huge buttons
+ $buttons =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "display" => "table-cell",
+ "vertical-align" => "middle",
+ "height" => "52px",
+ "text-align" => "center"
+ ],
+ self::is_class
+ ),
+ "a"
+ );
+
+ if(count($buttons) !== 0){
+
+ foreach($buttons as $button){
+
+ if(isset($button["attributes"]["href"])){
+
+ $sublink[
+ $this->fuckhtml
+ ->getTextContent(
+ $button
+ )
+ ] =
+ $this->unshiturl(
+ $button["attributes"]["href"]
+ );
+ }
+ }
+ }
+ }
+
+ // append description_after (contains carousel info)
+ $description = array_merge(
+ $description,
+ $description_after
+ );
+
+ $out["answer"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $wiki_title[0]
+ ),
+ "description" => $description,
+ "url" => $url,
+ "thumb" => $image,
+ "table" => $table,
+ "sublink" => $sublink
+ ];
+
+ continue;
+ }
+
+ //
+ // Detect related searches containers
+ //
$container_title =
$this->fuckhtml
->getElementsByClassName(
@@ -1183,6 +2322,21 @@ class google{
if(count($container_title) !== 0){
+ // get carousel entries
+ $carousels = $this->parsecarousels($container["innerHTML"]);
+ $this->fuckhtml->load($container);
+
+ foreach($carousels as $carousel){
+
+ foreach($carousel as $item){
+
+ if($item["url"] !== null){
+
+ $out["related"][] = $item["url"];
+ }
+ }
+ }
+
$container_title =
strtolower(
$this->fuckhtml
@@ -1191,23 +2345,151 @@ class google{
)
);
+ switch($container_title){
+
+ case "related searches":
+ case "people also search for":
+ //
+ // Parse related searches
+ //
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ foreach($as as $a){
+
+ $out["related"][] =
+ $this->fuckhtml
+ ->getTextContent($a);
+ }
+ break;
+
+ case "people also ask":
+ // get related queries
+ $divs =
+ $this->fuckhtml
+ ->getElementsByTagName("div");
+
+ foreach($divs as $div){
+
+ // add accdef's here
+ if($has_appended_accdef === false){
+
+ $out["web"] = array_merge($out["web"], $accdefs);
+ $has_appended_accdef = true;
+ }
+
+ // add accdef's questions
+ if(isset($div["attributes"]["role"])){
+
+ $out["related"][] =
+ $this->fuckhtml
+ ->getTextContent($div);
+
+ continue;
+ }
+ }
+ break;
+ }
+
+ continue;
+ }
+
+ //
+ // Parse news
+ //
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "font-size" => "16px",
+ "line-height" => "20px",
+ "font-weight" => "400"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ if(count($title) !== 0){
+
+ $carousels = $this->parsecarousels();
+ $this->fuckhtml->load($container);
+
+ if(count($carousels) === 0){
+
+ // no carousels found
+ continue;
+ }
+
+ $title =
+ strtolower(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ );
+
if(
- $container_title == "related searches" ||
- $container_title == "people also search for"
+ preg_match(
+ '/^latest from|^top stories/',
+ $title
+ )
){
- //
- // Parse related searches
- //
- $as =
- $this->fuckhtml
- ->getElementsByTagName("a");
-
- foreach($as as $a){
+ // Found news article
+ foreach($carousels[0] as $carousel){
- $out["related"][] =
- $this->fuckhtml
- ->getTextContent($a);
+ if($carousel["image"] !== null){
+
+ $thumb = [
+ "url" => $carousel["image"],
+ "ratio" => "16:9"
+ ];
+ }else{
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ $out["news"][] = [
+ "title" => $carousel["title"],
+ "description" => $carousel["description"],
+ "date" => $carousel["date"],
+ "thumb" => $thumb,
+ "url" => $carousel["url"]
+ ];
+ }
+ }
+
+ elseif(
+ $title == "images"
+ ){
+
+ foreach($carousels as $carousel){
+
+ foreach($carousel as $item){
+
+ $out["image"][] = [
+ "title" => $item["title"],
+ "source" => [
+ [
+ "url" => $item["url"],
+ "width" => $item["image_width"],
+ "height" => $item["image_height"]
+ ],
+ [
+ "url" => $item["image"],
+ "width" => $item["thumb_width"],
+ "height" => $item["thumb_height"]
+ ]
+ ],
+ "url" => $item["ref"]
+ ];
+ }
}
}
@@ -1215,134 +2497,148 @@ class google{
}
//
- // Parse image carousel
+ // Detect nodes with only text + links
//
- $title_container =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->findstyles(
- [
- "padding" => "12px 16px 12px"
- ],
- self::is_class
- ),
- "div"
- );
- if(count($title_container) !== 0){
+ // ignore elements with