mirror of
https://git.lolcat.ca/lolcat/4get.git
synced 2024-11-14 03:49:48 +01:00
added faceberg videos
This commit is contained in:
parent
bcc6ce58b3
commit
2913c58cec
28
README.md
28
README.md
|
@ -7,7 +7,35 @@ https://4get.ca/about
|
||||||
## Try it out
|
## Try it out
|
||||||
https://4get.ca
|
https://4get.ca
|
||||||
|
|
||||||
|
## Supported websites
|
||||||
|
1. Web
|
||||||
|
- DuckDuckGo
|
||||||
|
- Brave
|
||||||
|
- Mojeek
|
||||||
|
- Marginalia
|
||||||
|
- wiby
|
||||||
|
|
||||||
|
2. Images
|
||||||
|
- DuckDuckGo
|
||||||
|
- Yandex
|
||||||
|
- Brave
|
||||||
|
|
||||||
|
3. Videos
|
||||||
|
- YouTube
|
||||||
|
- Facebook videos
|
||||||
|
- DuckDuckgo
|
||||||
|
- Brave
|
||||||
|
|
||||||
|
4. News
|
||||||
|
- DuckDuckGo
|
||||||
|
- Brave
|
||||||
|
- Mojeek
|
||||||
|
|
||||||
|
More scrapers are coming soon. I currently want to add Google, Hackernews, Qwant and find a way to scrape Yandex web without those fucking captchas. A shopping, music and files tab is also in my todo list.
|
||||||
|
|
||||||
# Setup
|
# Setup
|
||||||
|
This section is still to-do. You will need to figure shit out for some of the apache2 stuff. Everything else should be OK.
|
||||||
|
|
||||||
Login as root.
|
Login as root.
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
|
|
11
about.php
11
about.php
|
@ -26,7 +26,9 @@ $left =
|
||||||
To set this as your default search engine on Firefox, right click the URL bar and select <div class="code-inline">Add "4get"</div>. Then, visit <a href="about:preferences#search" target="_BLANK" class="link">about:preferences#search</a> and select <div class="code-inline">4get</div> in the dropdown menu.
|
To set this as your default search engine on Firefox, right click the URL bar and select <div class="code-inline">Add "4get"</div>. Then, visit <a href="about:preferences#search" target="_BLANK" class="link">about:preferences#search</a> and select <div class="code-inline">4get</div> in the dropdown menu.
|
||||||
|
|
||||||
<a href="#chrome"><h2 id="chrome">On Chromium and Blink based browsers</h2></a>
|
<a href="#chrome"><h2 id="chrome">On Chromium and Blink based browsers</h2></a>
|
||||||
Right click the URL bar and click <div class="code-inline">Manage search engines and site search</div>, or visit <a href="chrome://settings/searchEngines" target="_BLANK" class="link">chrome://settings/searchEngines</a>. Then, create a new entry under <div class="code-inline">Search engines</div> and fill in the following details:
|
Click the 3 superpositioned dots at the top right of the screen and click on <div class="code-inline">Settings</div>, then search for <div class="code-inline">default search engine</div>, or visit <a href="chrome://settings/searchEngines">chrome://settings/searchEngines</a>.<br><br>
|
||||||
|
|
||||||
|
Once you\'re there, click the pencil on the last entry under "Search engines" (it\'s probably DuckDuckGo). Once you do that, a popup will appear. Populate it with the following information:
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
|
@ -39,19 +41,16 @@ $left =
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>Shortcut</td>
|
<td>Shortcut</td>
|
||||||
<td>4get.ca</td>
|
<td>4get</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>URL with %s in place of query</td>
|
<td>URL with %s in place of query</td>
|
||||||
<td>https://4get.ca/web?q=%s</td>
|
<td>https://4get.ca/web?s=%s</td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
Once that\'s done, click <div class="code-inline">Save</div>. Then, on the right handside of the newly created entry, open the dropdown menu and select <div class="code-inline">Make default</div>.
|
Once that\'s done, click <div class="code-inline">Save</div>. Then, on the right handside of the newly created entry, open the dropdown menu and select <div class="code-inline">Make default</div>.
|
||||||
|
|
||||||
<a href="#other-browsers"><h2 id="other-browsers">Other browsers</h2></a>
|
|
||||||
Get a real browser.
|
|
||||||
|
|
||||||
<h1>Frequently asked questions</h1>
|
<h1>Frequently asked questions</h1>
|
||||||
<a href="#what-is-this"><h2 id="what-is-this">What is this?</h2></a>
|
<a href="#what-is-this"><h2 id="what-is-this">What is this?</h2></a>
|
||||||
This is a metasearch engine that gets results from other engines, and strips away all of the tracking parameters and Microsoft/globohomo bullshit they add. Most of the other alternatives to Google jack themselves off about being ""privacy respecting"" or whatever the fuck but it always turns out to be a total lie, and I just got fed up with their shit honestly. Alternatives like Searx or YaCy all fucking sucks so I made my own thing.
|
This is a metasearch engine that gets results from other engines, and strips away all of the tracking parameters and Microsoft/globohomo bullshit they add. Most of the other alternatives to Google jack themselves off about being ""privacy respecting"" or whatever the fuck but it always turns out to be a total lie, and I just got fed up with their shit honestly. Alternatives like Searx or YaCy all fucking sucks so I made my own thing.
|
||||||
|
|
|
@ -169,7 +169,7 @@ class frontend{
|
||||||
}
|
}
|
||||||
|
|
||||||
$payload .=
|
$payload .=
|
||||||
htmlspecialchars($site["title"]) .
|
$this->highlighttext($keywords, $site["title"]) .
|
||||||
'</div>';
|
'</div>';
|
||||||
|
|
||||||
if($greentext !== null){
|
if($greentext !== null){
|
||||||
|
@ -903,6 +903,7 @@ class frontend{
|
||||||
"display" => "Scraper",
|
"display" => "Scraper",
|
||||||
"option" => [
|
"option" => [
|
||||||
"yt" => "YouTube",
|
"yt" => "YouTube",
|
||||||
|
"fb" => "Facebook videos",
|
||||||
"ddg" => "DuckDuckGo",
|
"ddg" => "DuckDuckGo",
|
||||||
"brave" => "Brave"//,
|
"brave" => "Brave"//,
|
||||||
//"google" => "Google"
|
//"google" => "Google"
|
||||||
|
@ -972,6 +973,11 @@ class frontend{
|
||||||
$lib = new google();
|
$lib = new google();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case "fb":
|
||||||
|
include "scraper/facebook.php";
|
||||||
|
$lib = new facebook();
|
||||||
|
break;
|
||||||
|
|
||||||
case "mojeek":
|
case "mojeek":
|
||||||
include "scraper/mojeek.php";
|
include "scraper/mojeek.php";
|
||||||
$lib = new mojeek();
|
$lib = new mojeek();
|
||||||
|
@ -1269,6 +1275,14 @@ class frontend{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(
|
||||||
|
$key == "older" ||
|
||||||
|
$key == "newer"
|
||||||
|
){
|
||||||
|
|
||||||
|
$value = date("Y-m-d", (int)$value);
|
||||||
|
}
|
||||||
|
|
||||||
$out[$key] = $value;
|
$out[$key] = $value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,4 @@
|
||||||
<?php
|
<?php
|
||||||
/*
|
|
||||||
$brave = new brave();
|
|
||||||
|
|
||||||
$handle = fopen("captcha.html", "r");
|
|
||||||
$html = fread($handle, filesize("captcha.html"));
|
|
||||||
fclose($handle);
|
|
||||||
|
|
||||||
$brave->bypasscaptcha($html, "yes", "ca");*/
|
|
||||||
|
|
||||||
class brave{
|
class brave{
|
||||||
|
|
||||||
|
@ -154,6 +146,11 @@ class brave{
|
||||||
case "no": $nsfw = "strict"; break;
|
case "no": $nsfw = "strict"; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if($country == "any"){
|
||||||
|
|
||||||
|
$country = "all";
|
||||||
|
}
|
||||||
|
|
||||||
$headers = [
|
$headers = [
|
||||||
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
|
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||||
|
@ -169,11 +166,6 @@ class brave{
|
||||||
"Sec-Fetch-User: ?1"
|
"Sec-Fetch-User: ?1"
|
||||||
];
|
];
|
||||||
|
|
||||||
if($country == "any"){
|
|
||||||
|
|
||||||
$country = "all";
|
|
||||||
}
|
|
||||||
|
|
||||||
$curlproc = curl_init();
|
$curlproc = curl_init();
|
||||||
|
|
||||||
if($get !== []){
|
if($get !== []){
|
||||||
|
@ -1990,6 +1982,8 @@ class brave{
|
||||||
as $result
|
as $result
|
||||||
){
|
){
|
||||||
|
|
||||||
|
print_r($result);
|
||||||
|
|
||||||
$out["image"][] = [
|
$out["image"][] = [
|
||||||
"title" => $result["title"],
|
"title" => $result["title"],
|
||||||
"source" => [
|
"source" => [
|
||||||
|
|
809
scraper/facebook.php
Normal file
809
scraper/facebook.php
Normal file
|
@ -0,0 +1,809 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
class facebook{
|
||||||
|
|
||||||
|
const get = 0;
|
||||||
|
const post = 1;
|
||||||
|
|
||||||
|
public function __construct(){
|
||||||
|
|
||||||
|
include "lib/nextpage.php";
|
||||||
|
$this->nextpage = new nextpage("fb");
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getfilters($page){
|
||||||
|
|
||||||
|
return [
|
||||||
|
"sort" => [
|
||||||
|
"display" => "Sort by",
|
||||||
|
"option" => [
|
||||||
|
"relevance" => "Relevance",
|
||||||
|
"most_recent" => "Most recent"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"newer" => [
|
||||||
|
"display" => "Newer than",
|
||||||
|
"option" => "_DATE"
|
||||||
|
],
|
||||||
|
"older" => [
|
||||||
|
"display" => "Older than",
|
||||||
|
"option" => "_DATE"
|
||||||
|
],
|
||||||
|
"live" => [
|
||||||
|
"display" => "Livestream",
|
||||||
|
"option" => [
|
||||||
|
"no" => "No",
|
||||||
|
"yes" => "Yes"
|
||||||
|
]
|
||||||
|
]
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function get($url, $get = [], $reqtype = self::get){
|
||||||
|
|
||||||
|
$curlproc = curl_init();
|
||||||
|
|
||||||
|
if($get !== []){
|
||||||
|
|
||||||
|
$get = http_build_query($get);
|
||||||
|
|
||||||
|
if($reqtype === self::get){
|
||||||
|
|
||||||
|
$headers = [
|
||||||
|
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0",
|
||||||
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||||
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
|
"Accept-Encoding: gzip",
|
||||||
|
"DNT: 1",
|
||||||
|
"Connection: keep-alive",
|
||||||
|
"Upgrade-Insecure-Requests: 1",
|
||||||
|
"Sec-Fetch-Dest: document",
|
||||||
|
"Sec-Fetch-Mode: navigate",
|
||||||
|
"Sec-Fetch-Site: none",
|
||||||
|
"Sec-Fetch-User: ?1"
|
||||||
|
];
|
||||||
|
|
||||||
|
$url .= "?" . $get;
|
||||||
|
}else{
|
||||||
|
|
||||||
|
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||||
|
|
||||||
|
$headers = [
|
||||||
|
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0",
|
||||||
|
"Accept: */*",
|
||||||
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
|
"Accept-Encoding: gzip, deflate, br",
|
||||||
|
"Content-Type: application/x-www-form-urlencoded",
|
||||||
|
"X-FB-Friendly-Name: SearchCometResultsPaginatedResultsQuery",
|
||||||
|
//"X-FB-LSD: AVptQC4a16c",
|
||||||
|
//"X-ASBD-ID: 129477",
|
||||||
|
"Content-Length: " . strlen($get),
|
||||||
|
"Origin: https://www.facebook.com",
|
||||||
|
"DNT: 1",
|
||||||
|
"Connection: keep-alive",
|
||||||
|
"Referer: https://www.facebook.com/watch/",
|
||||||
|
"Cookie: datr=__GMZCgwVF5BbyvAtfJojQwg; oo=v1%7C3%3A1691641171; wd=955x995",
|
||||||
|
"Sec-Fetch-Dest: empty",
|
||||||
|
"Sec-Fetch-Mode: cors",
|
||||||
|
"Sec-Fetch-Site: same-origin",
|
||||||
|
"TE: trailers"
|
||||||
|
];
|
||||||
|
|
||||||
|
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||||
|
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||||
|
|
||||||
|
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||||
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
|
||||||
|
|
||||||
|
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||||
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||||
|
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||||
|
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||||
|
|
||||||
|
$data = curl_exec($curlproc);
|
||||||
|
|
||||||
|
if(curl_errno($curlproc)){
|
||||||
|
|
||||||
|
throw new Exception(curl_error($curlproc));
|
||||||
|
}
|
||||||
|
|
||||||
|
curl_close($curlproc);
|
||||||
|
return $data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function video($get){
|
||||||
|
|
||||||
|
$search = $get["s"];
|
||||||
|
$npt = $get["npt"];
|
||||||
|
|
||||||
|
$this->out = [
|
||||||
|
"status" => "ok",
|
||||||
|
"npt" => null,
|
||||||
|
"video" => [],
|
||||||
|
"author" => [],
|
||||||
|
"livestream" => [],
|
||||||
|
"playlist" => [],
|
||||||
|
"reel" => []
|
||||||
|
];
|
||||||
|
|
||||||
|
if($get["npt"]){
|
||||||
|
|
||||||
|
$nextpage =
|
||||||
|
json_decode(
|
||||||
|
$this->nextpage->get(
|
||||||
|
$npt,
|
||||||
|
"videos"
|
||||||
|
),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
|
// parse next page
|
||||||
|
$this->video_nextpage($nextpage);
|
||||||
|
|
||||||
|
return $this->out;
|
||||||
|
}
|
||||||
|
|
||||||
|
// generate filter data
|
||||||
|
// {
|
||||||
|
// "rp_creation_time:0":"{\"name\":\"creation_time\",\"args\":\"{\\\"start_year\\\":\\\"2023\\\",\\\"start_month\\\":\\\"2023-08\\\",\\\"end_year\\\":\\\"2023\\\",\\\"end_month\\\":\\\"2023-08\\\",\\\"start_day\\\":\\\"2023-08-10\\\",\\\"end_day\\\":\\\"2023-08-10\\\"}\"}",
|
||||||
|
// "videos_sort_by:0":"{\"name\":\"videos_sort_by\",\"args\":\"Most Recent\"}",
|
||||||
|
// "videos_live:0":"{\"name\":\"videos_live\",\"args\":\"\"}"
|
||||||
|
// }
|
||||||
|
$filter = [];
|
||||||
|
$sort = $get["sort"];
|
||||||
|
$live = $get["live"];
|
||||||
|
$older = $get["older"];
|
||||||
|
$newer = $get["newer"];
|
||||||
|
|
||||||
|
if(
|
||||||
|
$older !== false ||
|
||||||
|
$newer !== false
|
||||||
|
){
|
||||||
|
|
||||||
|
if($older === false){
|
||||||
|
|
||||||
|
$older = time();
|
||||||
|
}
|
||||||
|
|
||||||
|
if($newer === false){
|
||||||
|
|
||||||
|
$newer = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
$filter["rp_creation_time:0"] =
|
||||||
|
json_encode(
|
||||||
|
[
|
||||||
|
"name" => "creation_time",
|
||||||
|
"args" =>
|
||||||
|
json_encode(
|
||||||
|
[
|
||||||
|
"start_year" => date("Y", $newer),
|
||||||
|
"start_month" => date("Y-m", $newer),
|
||||||
|
"end_year" => date("Y", $older),
|
||||||
|
"end_month" => date("Y-m", $older),
|
||||||
|
"start_day" => date("Y-m-d", $newer),
|
||||||
|
"end_day" => date("Y-m-d", $older)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if($sort != "relevance"){
|
||||||
|
|
||||||
|
$filter["videos_sort_by:0"] =
|
||||||
|
json_encode(
|
||||||
|
[
|
||||||
|
"name" => "videos_sort_by",
|
||||||
|
"args" => "Most Recent"
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if($live != "no"){
|
||||||
|
|
||||||
|
$filter["videos_live:0"] = json_encode(
|
||||||
|
[
|
||||||
|
"name" => "videos_live",
|
||||||
|
"args" => ""
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
$req = [
|
||||||
|
"q" => $search
|
||||||
|
];
|
||||||
|
|
||||||
|
if(count($filter) !== 0){
|
||||||
|
|
||||||
|
$req["filters"] =
|
||||||
|
base64_encode(
|
||||||
|
json_encode(
|
||||||
|
$filter
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
$html =
|
||||||
|
$this->get(
|
||||||
|
"https://www.facebook.com/watch/search/",
|
||||||
|
$req
|
||||||
|
);
|
||||||
|
/*
|
||||||
|
$handle = fopen("scraper/facebook.html", "r");
|
||||||
|
$html = fread($handle, filesize("scraper/facebook.html"));
|
||||||
|
fclose($handle);*/
|
||||||
|
|
||||||
|
preg_match_all(
|
||||||
|
'/({"__bbox":.*,"sequence_number":0}})\]\]/',
|
||||||
|
$html,
|
||||||
|
$json
|
||||||
|
);
|
||||||
|
|
||||||
|
if(!isset($json[1][1])){
|
||||||
|
|
||||||
|
throw new Exception("Could not grep JSON body");
|
||||||
|
}
|
||||||
|
|
||||||
|
$json = json_decode($json[1][1], true);
|
||||||
|
|
||||||
|
foreach(
|
||||||
|
$json
|
||||||
|
["__bbox"]
|
||||||
|
["result"]
|
||||||
|
["data"]
|
||||||
|
["serpResponse"]
|
||||||
|
["results"]
|
||||||
|
["edges"]
|
||||||
|
as $result
|
||||||
|
){
|
||||||
|
|
||||||
|
$this->parse_edge($result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// get nextpage data
|
||||||
|
if(
|
||||||
|
$json
|
||||||
|
["__bbox"]
|
||||||
|
["result"]
|
||||||
|
["data"]
|
||||||
|
["serpResponse"]
|
||||||
|
["results"]
|
||||||
|
["page_info"]
|
||||||
|
["has_next_page"]
|
||||||
|
== 1
|
||||||
|
){
|
||||||
|
|
||||||
|
preg_match(
|
||||||
|
'/handleWithCustomApplyEach\(ScheduledApplyEach,({.*})\);}\);}\);<\/script>/',
|
||||||
|
$html,
|
||||||
|
$nextpagedata
|
||||||
|
);
|
||||||
|
|
||||||
|
// [POST] https://www.facebook.com/api/graphql/
|
||||||
|
// FORM data, not JSON!
|
||||||
|
|
||||||
|
$nextpage = [
|
||||||
|
"av" => "0",
|
||||||
|
"__user" => null,
|
||||||
|
"__a" => null,
|
||||||
|
"__req" => "2",
|
||||||
|
"__hs" => null,
|
||||||
|
"dpr" => "1",
|
||||||
|
"__ccg" => null,
|
||||||
|
"__rev" => null,
|
||||||
|
// another client side token
|
||||||
|
"__s" => $this->randomstring(6) . ":" . $this->randomstring(6) . ":" . $this->randomstring(6),
|
||||||
|
"__hsi" => null,
|
||||||
|
// tracking fingerprint (probably generated using webgl)
|
||||||
|
"__dyn" => "7xeUmwlE7ibwKBWo2vwAxu13w8CewSwMwNw9G2S0im3y4o0B-q1ew65xO2O1Vw8G1Qw5Mx61vw9m1YwBgao6C0Mo5W3S7Udo5q4U2zxe2Gew9O222SUbEaU2eU5O0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w",
|
||||||
|
"__csr" => $this->randomstring(null),
|
||||||
|
"__comet_req" => null,
|
||||||
|
"lsd" => null,
|
||||||
|
"jazoest" => null,
|
||||||
|
"__spin_r" => null,
|
||||||
|
"__spin_b" => null,
|
||||||
|
"__spin_t" => null,
|
||||||
|
"fb_api_caller_class" => "RelayModern",
|
||||||
|
"fb_api_req_friendly_name" => "SearchCometResultsPaginatedResultsQuery",
|
||||||
|
"variables" => [ // this is json
|
||||||
|
"UFI2CommentsProvider_commentsKey" => "SearchCometResultsInitialResultsQuery",
|
||||||
|
"allow_streaming" => false,
|
||||||
|
"args" => [
|
||||||
|
"callsite" => "comet:watch_search",
|
||||||
|
"config" => [
|
||||||
|
"exact_match" => false,
|
||||||
|
"high_confidence_config" => null,
|
||||||
|
"intercept_config" => null,
|
||||||
|
"sts_disambiguation" => null,
|
||||||
|
"watch_config" => null
|
||||||
|
],
|
||||||
|
"context" => [
|
||||||
|
"bsid" => null,
|
||||||
|
"tsid" => null
|
||||||
|
],
|
||||||
|
"experience" => [
|
||||||
|
"encoded_server_defined_params" => null,
|
||||||
|
"fbid" => null,
|
||||||
|
"type" => "WATCH_TAB_GLOBAL"
|
||||||
|
],
|
||||||
|
"filters" => [],
|
||||||
|
"text" => $search
|
||||||
|
],
|
||||||
|
"count" => 5,
|
||||||
|
"cursor" =>
|
||||||
|
$json
|
||||||
|
["__bbox"]
|
||||||
|
["result"]
|
||||||
|
["data"]
|
||||||
|
["serpResponse"]
|
||||||
|
["results"]
|
||||||
|
["page_info"]
|
||||||
|
["end_cursor"],
|
||||||
|
"displayCommentsContextEnableComment" => false,
|
||||||
|
"displayCommentsContextIsAdPreview" => false,
|
||||||
|
"displayCommentsContextIsAggregatedShare" => false,
|
||||||
|
"displayCommentsContextIsStorySet" => false,
|
||||||
|
"displayCommentsFeedbackContext" => null,
|
||||||
|
"feedLocation" => "SEARCH",
|
||||||
|
"feedbackSource" => 23,
|
||||||
|
"fetch_filters" => true,
|
||||||
|
"focusCommentID" => null,
|
||||||
|
"locale" => null,
|
||||||
|
"privacySelectorRenderLocation" => "COMET_STREAM",
|
||||||
|
"renderLocation" => "search_results_page",
|
||||||
|
"scale" => 1,
|
||||||
|
"stream_initial_count" => 0,
|
||||||
|
"useDefaultActor" => false,
|
||||||
|
"__relay_internal__pv__IsWorkUserrelayprovider" => false,
|
||||||
|
"__relay_internal__pv__IsMergQAPollsrelayprovider" => false,
|
||||||
|
"__relay_internal__pv__StoriesArmadilloReplyEnabledrelayprovider" => false,
|
||||||
|
"__relay_internal__pv__StoriesRingrelayprovider" => false
|
||||||
|
],
|
||||||
|
"server_timestamps" => "true",
|
||||||
|
"doc_id" => "6761275837251607" // is actually dynamic
|
||||||
|
];
|
||||||
|
|
||||||
|
// append filters to nextpage
|
||||||
|
foreach($filter as $key => $value){
|
||||||
|
|
||||||
|
$nextpage["variables"]["args"]["filters"][] =
|
||||||
|
$value;
|
||||||
|
}
|
||||||
|
|
||||||
|
$nextpagedata = json_decode($nextpagedata[1], true);
|
||||||
|
|
||||||
|
// get bsid
|
||||||
|
foreach($nextpagedata["require"] as $key){
|
||||||
|
|
||||||
|
foreach($key as $innerkey){
|
||||||
|
|
||||||
|
if(is_array($innerkey)){
|
||||||
|
foreach($innerkey as $inner_innerkey){
|
||||||
|
|
||||||
|
if(is_array($inner_innerkey)){
|
||||||
|
foreach($inner_innerkey as $inner_inner_innerkey){
|
||||||
|
|
||||||
|
if(
|
||||||
|
isset(
|
||||||
|
$inner_inner_innerkey
|
||||||
|
["variables"]
|
||||||
|
["args"]
|
||||||
|
["context"]
|
||||||
|
["bsid"]
|
||||||
|
)
|
||||||
|
){
|
||||||
|
|
||||||
|
$nextpage
|
||||||
|
["variables"]
|
||||||
|
["args"]
|
||||||
|
["context"]
|
||||||
|
["bsid"] =
|
||||||
|
$inner_inner_innerkey
|
||||||
|
["variables"]
|
||||||
|
["args"]
|
||||||
|
["context"]
|
||||||
|
["bsid"];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach($nextpagedata["define"] as $key){
|
||||||
|
|
||||||
|
if(isset($key[2]["haste_session"])){
|
||||||
|
|
||||||
|
$nextpage["__hs"] = $key[2]["haste_session"];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($key[2]["connectionClass"])){
|
||||||
|
|
||||||
|
$nextpage["__ccg"] = $key[2]["connectionClass"];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($key[2]["__spin_r"])){
|
||||||
|
|
||||||
|
$nextpage["__spin_r"] = (string)$key[2]["__spin_r"];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($key[2]["hsi"])){
|
||||||
|
|
||||||
|
$nextpage["__hsi"] = (string)$key[2]["hsi"];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(
|
||||||
|
isset($key[2]["token"]) &&
|
||||||
|
!empty($key[2]["token"])
|
||||||
|
){
|
||||||
|
|
||||||
|
$nextpage["lsd"] = $key[2]["token"];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($key[2]["__spin_r"])){
|
||||||
|
|
||||||
|
$nextpage["__spin_r"] = (string)$key[2]["__spin_r"];
|
||||||
|
$nextpage["__rev"] = $nextpage["__spin_r"];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($key[2]["__spin_b"])){
|
||||||
|
|
||||||
|
$nextpage["__spin_b"] = $key[2]["__spin_b"];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($key[2]["__spin_t"])){
|
||||||
|
|
||||||
|
$nextpage["__spin_t"] = (string)$key[2]["__spin_t"];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
preg_match(
|
||||||
|
'/{"u":"\\\\\/ajax\\\\\/qm\\\\\/\?__a=([0-9]+)&__user=([0-9]+)&__comet_req=([0-9]+)&jazoest=([0-9]+)"/',
|
||||||
|
$html,
|
||||||
|
$ajaxparams
|
||||||
|
);
|
||||||
|
|
||||||
|
if(count($ajaxparams) !== 5){
|
||||||
|
|
||||||
|
throw new Exception("Could not grep the AJAX parameters");
|
||||||
|
}
|
||||||
|
|
||||||
|
$nextpage["__a"] = $ajaxparams[1];
|
||||||
|
$nextpage["__user"] = $ajaxparams[2];
|
||||||
|
$nextpage["__comet_req"] = $ajaxparams[3];
|
||||||
|
$nextpage["jazoest"] = $ajaxparams[4];
|
||||||
|
|
||||||
|
/*
|
||||||
|
$handle = fopen("scraper/facebook-nextpage.json", "r");
|
||||||
|
$json = fread($handle, filesize("scraper/facebook-nextpage.json"));
|
||||||
|
fclose($handle);*/
|
||||||
|
|
||||||
|
$nextpage["variables"] = json_encode($nextpage["variables"]);
|
||||||
|
|
||||||
|
$this->video_nextpage($nextpage);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->out;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function video_nextpage($nextpage, $getcursor = false){
|
||||||
|
|
||||||
|
$json =
|
||||||
|
$this->get(
|
||||||
|
"https://www.facebook.com/api/graphql/",
|
||||||
|
$nextpage,
|
||||||
|
self::post
|
||||||
|
);
|
||||||
|
|
||||||
|
$json = json_decode($json, true);
|
||||||
|
|
||||||
|
if($json === null){
|
||||||
|
|
||||||
|
throw new Exception("Failed to decode next page JSON");
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach(
|
||||||
|
$json
|
||||||
|
["data"]
|
||||||
|
["serpResponse"]
|
||||||
|
["results"]
|
||||||
|
["edges"]
|
||||||
|
as $result
|
||||||
|
){
|
||||||
|
|
||||||
|
$this->parse_edge($result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(
|
||||||
|
$json
|
||||||
|
["data"]
|
||||||
|
["serpResponse"]
|
||||||
|
["results"]
|
||||||
|
["page_info"]
|
||||||
|
["has_next_page"] == 1
|
||||||
|
){
|
||||||
|
|
||||||
|
$nextpage["variables"] = json_decode($nextpage["variables"], true);
|
||||||
|
|
||||||
|
$nextpage["variables"]["cursor"] =
|
||||||
|
$json
|
||||||
|
["data"]
|
||||||
|
["serpResponse"]
|
||||||
|
["results"]
|
||||||
|
["page_info"]
|
||||||
|
["end_cursor"];
|
||||||
|
|
||||||
|
$nextpage["variables"] = json_encode($nextpage["variables"]);
|
||||||
|
|
||||||
|
//change this for second call. after, it's static.
|
||||||
|
// TODO: csr also updates to longer string
|
||||||
|
$nextpage["__dyn"] = "7xeUmwlEnwn8K2WnFw9-2i5U4e0yoW3q322aew9G2S0zU20xi3y4o0B-q1ew65xOfxO1Vw8G11xmfz81s8hwGwQw9m1YwBgao6C2O0B85W3S7Udo5qfK0EUjwGzE2swwwJK2W2K0zK5o4q0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w";
|
||||||
|
|
||||||
|
// TODO: change this on third and 6th call
|
||||||
|
//$nextpage["__s"] = $this->randomstring(6) . ":" . explode(":", $nextpage["__s"], 2)[1];
|
||||||
|
|
||||||
|
$this->out["npt"] = $this->nextpage->store(json_encode($nextpage), "videos");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function parse_edge($edge){
|
||||||
|
|
||||||
|
$append = "video";
|
||||||
|
$edge =
|
||||||
|
$edge
|
||||||
|
["relay_rendering_strategy"]
|
||||||
|
["view_model"];
|
||||||
|
|
||||||
|
if(
|
||||||
|
strtolower(
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["video_broadcast_status"]
|
||||||
|
)
|
||||||
|
== "live"
|
||||||
|
){
|
||||||
|
|
||||||
|
// handle livestream
|
||||||
|
$duration = "_LIVE";
|
||||||
|
$append = "livestream";
|
||||||
|
$timetext = null;
|
||||||
|
$views =
|
||||||
|
(int)$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["relative_time_string"];
|
||||||
|
|
||||||
|
}elseif(
|
||||||
|
stripos(
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["video_broadcast_status"],
|
||||||
|
"vod"
|
||||||
|
) !== false
|
||||||
|
){
|
||||||
|
|
||||||
|
// handle VOD format
|
||||||
|
$timetext = null;
|
||||||
|
$views =
|
||||||
|
(int)$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["relative_time_string"];
|
||||||
|
|
||||||
|
$duration =
|
||||||
|
$this->hms2int(
|
||||||
|
$edge
|
||||||
|
["video_thumbnail_model"]
|
||||||
|
["video_duration_text"]
|
||||||
|
);
|
||||||
|
|
||||||
|
}else{
|
||||||
|
|
||||||
|
// handle normal format
|
||||||
|
$timetext =
|
||||||
|
explode(
|
||||||
|
" · ",
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["relative_time_string"],
|
||||||
|
2
|
||||||
|
);
|
||||||
|
|
||||||
|
if(count($timetext) === 2){
|
||||||
|
|
||||||
|
$views = $this->truncatedcount2int($timetext[1]);
|
||||||
|
}else{
|
||||||
|
|
||||||
|
$views = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$timetext = strtotime($timetext[0]);
|
||||||
|
|
||||||
|
$duration =
|
||||||
|
$this->hms2int(
|
||||||
|
$edge
|
||||||
|
["video_thumbnail_model"]
|
||||||
|
["video_duration_text"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(
|
||||||
|
isset(
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["video_owner_profile"]
|
||||||
|
["uri_token"]
|
||||||
|
)
|
||||||
|
){
|
||||||
|
|
||||||
|
$profileurl =
|
||||||
|
"https://www.facebook.com/watch/" .
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["video_owner_profile"]
|
||||||
|
["uri_token"];
|
||||||
|
}else{
|
||||||
|
|
||||||
|
$profileurl =
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["video_owner_profile"]
|
||||||
|
["url"];
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->out[$append][] = [
|
||||||
|
"title" =>
|
||||||
|
$this->limitstrlen(
|
||||||
|
str_replace(
|
||||||
|
"\n",
|
||||||
|
" ",
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["title"]
|
||||||
|
),
|
||||||
|
100
|
||||||
|
),
|
||||||
|
"description" =>
|
||||||
|
empty(
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["save_description"]
|
||||||
|
) ?
|
||||||
|
null :
|
||||||
|
str_replace(
|
||||||
|
"\n",
|
||||||
|
" ",
|
||||||
|
$this->limitstrlen(
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["save_description"]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"author" => [
|
||||||
|
"name" =>
|
||||||
|
$edge
|
||||||
|
["video_metadata_model"]
|
||||||
|
["video_owner_profile"]
|
||||||
|
["name"],
|
||||||
|
"url" => $profileurl,
|
||||||
|
"avatar" => null
|
||||||
|
],
|
||||||
|
"date" => $timetext,
|
||||||
|
"duration" => $duration,
|
||||||
|
"views" => $views,
|
||||||
|
"thumb" =>
|
||||||
|
[
|
||||||
|
"url" =>
|
||||||
|
$edge
|
||||||
|
["video_thumbnail_model"]
|
||||||
|
["thumbnail_image"]
|
||||||
|
["uri"],
|
||||||
|
"ratio" => "16:9"
|
||||||
|
],
|
||||||
|
"url" =>
|
||||||
|
"https://www.facebook.com/watch/?v=" .
|
||||||
|
$edge
|
||||||
|
["video_click_model"]
|
||||||
|
["click_metadata_model"]
|
||||||
|
["video_id"]
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function randomstring($len){
|
||||||
|
|
||||||
|
if($len === null){
|
||||||
|
|
||||||
|
$str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789-";
|
||||||
|
$len = rand(141, 145);
|
||||||
|
$c = 61;
|
||||||
|
}else{
|
||||||
|
|
||||||
|
$str = "abcdefghijklmnopqrstuvwxyz123456789";
|
||||||
|
$c = 34;
|
||||||
|
}
|
||||||
|
|
||||||
|
$out = null;
|
||||||
|
for($i=0; $i<$len; $i++){
|
||||||
|
|
||||||
|
$out .= $str[rand(0, $c)];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function limitstrlen($text, $len = 300){
|
||||||
|
|
||||||
|
return explode("\n", wordwrap($text, $len, "\n"))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function hms2int($time){
|
||||||
|
|
||||||
|
$parts = explode(":", $time, 3);
|
||||||
|
$time = 0;
|
||||||
|
|
||||||
|
if(count($parts) === 3){
|
||||||
|
|
||||||
|
// hours
|
||||||
|
$time = $time + ((int)$parts[0] * 3600);
|
||||||
|
array_shift($parts);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(count($parts) === 2){
|
||||||
|
|
||||||
|
// minutes
|
||||||
|
$time = $time + ((int)$parts[0] * 60);
|
||||||
|
array_shift($parts);
|
||||||
|
}
|
||||||
|
|
||||||
|
// seconds
|
||||||
|
$time = $time + (int)$parts[0];
|
||||||
|
|
||||||
|
return $time;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function truncatedcount2int($number){
|
||||||
|
|
||||||
|
// decimal should always be 1 number long
|
||||||
|
$number = explode(" ", $number, 2);
|
||||||
|
$number = $number[0];
|
||||||
|
|
||||||
|
$unit = strtolower($number[strlen($number) - 1]);
|
||||||
|
|
||||||
|
$tmp = explode(".", $number, 2);
|
||||||
|
$number = (int)$number;
|
||||||
|
|
||||||
|
if(count($tmp) === 2){
|
||||||
|
|
||||||
|
$decimal = (int)$tmp[1];
|
||||||
|
}else{
|
||||||
|
|
||||||
|
$decimal = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch($unit){
|
||||||
|
|
||||||
|
case "k":
|
||||||
|
$exponant = 1000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "m":
|
||||||
|
$exponant = 1000000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "b";
|
||||||
|
$exponant = 1000000000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
$exponant = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ($number * $exponant) + ($decimal * ($exponant / 10));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue