google web, videos and news, various other fixes

This commit is contained in:
lolcat 2023-11-27 01:01:56 -05:00
parent 9fd993b47b
commit 2519666e1c
22 changed files with 2913 additions and 1118 deletions

View file

@ -23,6 +23,7 @@ https://4get.ca
- DuckDuckGo
- Brave
- Yandex
- Google
- Mojeek
- Marginalia
- wiby
@ -41,10 +42,12 @@ https://4get.ca
- DuckDuckgo
- Brave
- Yandex
- Google
4. News
- DuckDuckGo
- Brave
- Google
- Mojeek
5. Music
@ -61,7 +64,7 @@ https://4get.ca
- YouTube
- SoundCloud
More scrapers are coming soon. I currently want to add Google web/video/news search, HackerNews (durr orange site!!) and Qwant. A shopping and files tab is also in my todo list.
More scrapers are coming soon. I currently want to add HackerNews (durr orange site!!), Qwant, Yep and other garbage. A shopping, files, tab and more music scrapers are also on my todo list.
# Installation
This section is still to-do. You will need to figure shit out for some of the apache2 and nginx stuff. Everything else should be OK.
@ -190,6 +193,41 @@ services:
Replace relevant values and start with `docker-compose up -d`
## Install on Caddy
1. Install dependencies:
`sudo apt install caddy php8.2-dom php8.2-imagick imagemagick php8.2-curl curl php8.2-apcu git`
2. Clone this repository where you want to host this from:
`cd /var/www && sudo git clone https://git.konakona.moe/diowo/4get`
3. Set permission on the `icons` directory inside `4get`
`cd /var/www/4get/ && sudo chmod 777 -R icons/`
4. Add an entry for 4get on your Caddyfile at `/etc/caddy/Caddyfile`
```sh
4get.konakona.moe {
root * /var/www/4get
file_server
encode gzip
php_fastcgi unix//var/run/php/php8.2-fpm.sock {
index index.php
}
redir /{path}.php{query} 301
try_files {path} {path}.php
}
```
Caddy deals with SSL certificates automatically so you don't have to mess with anything. Also if needed, a sample of my Caddyfile can be found [here](https://git.konakona.moe/diowo/misc/src/branch/master/etc/caddy/Caddyfile).
5. Restart Caddy
`sudo systemctl restart caddy`
# Encryption setup
I'm schizoid (as you should) so I'm gonna setup 4096bit key encryption. To complete this step, you need a domain or subdomain in your possession. Make sure that the DNS shit for your domain has propagated properly before continuing, because certbot is a piece of shit that will error out the ass once you reach 5 attempts under an hour.

View file

@ -5,8 +5,8 @@ header("Access-Control-Allow-Origin: *");
include "data/config.php";
$bot_requests = apcu_fetch("captcha");
$real_requests = apcu_fetch("real_requests");
$bot_requests = apcu_fetch("captcha_gen");
echo json_encode(
[

View file

@ -68,8 +68,8 @@
+ Get the next page of results
All API responses come with an array index named "nextpage". To get
the next page of results, you must make another API call with &npt.
All API responses come with an array index named "npt". To get the
next page of results, you must make another API call with &npt.
Example ::

View file

@ -39,10 +39,12 @@ if(
}
try{
echo json_encode(
$scraper->web($get),
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
);
echo
json_encode(
$scraper->web($get),
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
);
}catch(Exception $e){

View file

@ -5,7 +5,7 @@ class config{
// any parameters.
// 4get version. Please keep this updated
const VERSION = 5;
const VERSION = 6;
// Will be shown pretty much everywhere.
const SERVER_NAME = "4get";
@ -56,14 +56,22 @@ class config{
const INSTANCES = [
"https://4get.ca",
"https://4get.zzls.xyz",
"https://4getus.zzls.xyz",
"https://4get.silly.computer",
"https://4g.opnxng.com",
"https://4get.konakona.moe"
"https://4get.konakona.moe",
"https://4get.lvkaszus.pl",
"https://4g.ggtyler.dev",
"https://4get.perennialte.ch",
"https://4get.sihj.net",
"https://4get.hbubli.cc",
"https://4get.plunked.party",
"https://4get.seitan-ayoub.lol"
];
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0";
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/120.0";
// Proxy pool assignments for each scraper
// false = Use server's raw IP

View file

@ -7,6 +7,7 @@ class captcha{
// check if we want captcha
if(config::BOT_PROTECTION !== 1){
apcu_inc("real_requests");
if($output === true){
$frontend->loadheader(
$get,
@ -45,6 +46,8 @@ class captcha{
}else{
// the cookie is OK! dont die() and give results
apcu_inc("real_requests");
if($output === true){
$frontend->loadheader(
$get,
@ -175,6 +178,8 @@ class captcha{
apcu_inc($key, 1, $stupid, 86400);
apcu_inc("real_requests");
setcookie(
"pass",
$key,
@ -197,7 +202,7 @@ class captcha{
$error = "<div class=\"quote\">You were <a href=\"https://www.youtube.com/watch?v=e1d7fkQx2rk\" target=\"_BLANK\" rel=\"noreferrer nofollow\">kicked out of Mensa.</a> Please try again.</div>";
}
}
// get the positions for the answers
// will return between 3 and 6 answer positions
$range = range(0, 15);

View file

@ -25,7 +25,7 @@ class frontend{
if($theme != "Dark"){
$replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . $theme . '.css?v' . config::VERSION . '">';
$replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . rawurlencode($theme) . '.css?v' . config::VERSION . '">';
}else{
$replacements["style"] = "";
@ -84,6 +84,8 @@ class frontend{
){
// bot detected !!
apcu_inc("captcha_gen");
$this->drawerror(
"Tshh, blocked!",
'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.',
@ -889,7 +891,7 @@ class frontend{
"ddg" => "DuckDuckGo",
"brave" => "Brave",
"yandex" => "Yandex",
//"google" => "Google",
"google" => "Google",
"mojeek" => "Mojeek",
"marginalia" => "Marginalia",
"wiby" => "wiby"
@ -921,8 +923,8 @@ class frontend{
//"fb" => "Facebook videos",
"ddg" => "DuckDuckGo",
"brave" => "Brave",
"yandex" => "Yandex"
//"google" => "Google"
"yandex" => "Yandex",
"google" => "Google"
]
];
break;
@ -933,7 +935,7 @@ class frontend{
"option" => [
"ddg" => "DuckDuckGo",
"brave" => "Brave",
//"google" => "Google",
"google" => "Google",
"mojeek" => "Mojeek"
]
];

View file

@ -15,7 +15,7 @@ class fuckhtml{
if(!isset($html["innerHTML"])){
throw new Exception("(load) Supplied array doesn't contain a innerHTML index");
throw new Exception("(load) Supplied array doesn't contain an innerHTML index");
}
$html = $html["innerHTML"];
}
@ -35,6 +35,11 @@ class fuckhtml{
$this->strlen = strlen($this->html);
}
public function getloadedhtml(){
return $this->html;
}
public function getElementsByTagName(string $tagname){
$out = [];
@ -46,7 +51,7 @@ class fuckhtml{
if($tagname == "*"){
$tagname = '[^\/<>\s]+';
$tagname = '[A-Za-z0-9._-]+';
}else{
$tagname = preg_quote(strtolower($tagname));
@ -126,7 +131,7 @@ class fuckhtml{
}
);
// computer the indent level for each element
// compute the indent level for each element
$level = [];
$count = count($out);
@ -314,7 +319,7 @@ class fuckhtml{
if(!isset($html["innerHTML"])){
throw new Exception("(getTextContent) Supplied array doesn't contain a innerHTML index");
throw new Exception("(getTextContent) Supplied array doesn't contain an innerHTML index");
}
$html = $html["innerHTML"];
}
@ -441,4 +446,27 @@ class fuckhtml{
return json_decode($json_out, true);
}
public function parseJsString($string){
return
preg_replace_callback(
'/\\\u[A-Fa-f0-9]{4}|\\\x[A-Fa-f0-9]{2}/',
function($match){
if($match[0][1] == "u"){
return json_decode('"' . $match[0] . '"');
}else{
return mb_convert_encoding(
stripcslashes($match[0]),
"utf-8",
"windows-1252"
);
}
},
$string
);
}
}

View file

@ -24,5 +24,5 @@
User-agent: *
Disallow:
host: 4get.ca
sitemap: https://4get.ca/sitemap.xml
Host: 4get.ca
Sitemap: https://4get.ca/sitemap

View file

@ -857,7 +857,9 @@ class brave{
// parse ratings
if(
isset($info["ratings"]) &&
$info["ratings"] != "void 0"
$info["ratings"] != "void 0" &&
is_array($info["ratings"]) &&
count($info["ratings"]) !== 0
){
$description[] = [
@ -1183,7 +1185,7 @@ class brave{
"title" => $news["title"],
"author" => null,
"description" => $news["description"],
"date" => !isset($news["age"]) || $news["age"] == "void 0" ? null : strtotime($news["age"]),
"date" => !isset($news["age"]) || $news["age"] == "void 0" || $news["age"] == "null" ? null : strtotime($news["age"]),
"thumb" => $thumb,
"url" => $news["url"]
];

View file

@ -545,8 +545,6 @@ class ddg{
public function web($get){
$proxy = null;
if($get["npt"]){
[$jsgrep, $proxy] = $this->backend->get($get["npt"], "web");

File diff suppressed because it is too large Load diff

View file

@ -608,7 +608,7 @@ class mojeek{
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName("i", "p")[1]
->getElementsByClassName("i", "p")[0]
)
);

View file

@ -229,7 +229,7 @@ class sc{
if($json === null){
throw new Exception("Failed to decode JSON");
throw new Exception("Failed to decode JSON. Did the keys set in data/config.php expire?");
}
$out = [

View file

@ -117,10 +117,10 @@ $settings = [
"value" => "yandex",
"text" => "Yandex"
],
/*[
[
"value" => "google",
"text" => "Google"
],*/
],
[
"value" => "mojeek",
"text" => "Mojeek"
@ -192,11 +192,11 @@ $settings = [
[
"value" => "yandex",
"text" => "Yandex"
]/*,
],
[
"value" => "google",
"text" => "Google"
]*/
]
]
],
[
@ -211,10 +211,10 @@ $settings = [
"value" => "brave",
"text" => "Brave"
],
/*[
[
"value" => "google",
"text" => "Google"
],*/
],
[
"value" => "mojeek",
"text" => "Mojeek"
@ -434,20 +434,33 @@ $left .=
'</div>' .
'<div class="settings-submit">' .
'<input type="submit" value="Update settings!">' .
'<a href="../">&lt; Return to front page</a>' .
'<a href="../">&lt; Go back</a>' .
'</div>' .
'</form>';
if(count($_GET) === 0){
$code = [];
foreach($_COOKIE as $key => $value){
$code[] = rawurlencode($key) . "=" . rawurlencode($value);
}
$code = implode("&", $code);
if($code != ""){
$code = "?" . $code;
}
echo
$frontend->load(
"search.html",
[
"class" => "",
"right-left" =>
'<div class="infobox"><h2>Preference link</h2>Follow this link to auto-apply all cookies. Useful if your browser clears out cookies after a browsing session. Following this link will redirect you to the front page, unless no settings are set.<br><br>' .
'<a href="settings' . rtrim("?" . str_replace("; ", "&", $code), "?") . '">Bookmark me!</a>' .
'<div class="infobox"><h2>Preference link</h2>Following this link will re-apply all cookies configured here and will redirect you to the front page. Useful if your browser clears out cookies after a browsing session.<br><br>' .
'<a href="settings' . $code . '">Bookmark me!</a>' .
'</div>',
"right-right" => "",
"left" => $left

BIN
static/icon/call.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

BIN
static/icon/directions.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

View file

@ -160,6 +160,16 @@ function number_format(int){
return new Intl.NumberFormat().format(int);
}
window.fetch = (function(fetch) {
return function(fn, t){
const begin = Date.now();
return fetch.apply(this, arguments).then(function(response) {
response.ping = Date.now() - begin;
return response;
});
};
})(window.fetch);
// parse initial server list
fetch_server(window.location.origin);
@ -187,25 +197,24 @@ async function fetch_server(server){
list.push(server);
var data = null;
var ping = new Date().getTime();
try{
data = await fetch(
var payload = await fetch(
server + "/ami4get"
);
if(data.status !== 200){
if(payload.status !== 200){
// endpoint is not available
errors++;
div_failedreqs.textContent = number_format(errors);
console.warn(server + ": Invalid HTTP code " + data.status);
console.warn(server + ": Invalid HTTP code " + payload.status);
return;
}
data = await data.json();
data.server.ping = new Date().getTime() - ping;
data = await payload.json();
data.server.ping = payload.ping;
}catch(error){

View file

@ -499,6 +499,7 @@ h3,h4,h5,h6{
text-align:center;
display:block;
text-align:left;
white-space:nowrap;
}
.favicon-dropdown img{
@ -1247,6 +1248,11 @@ table tr a:last-child{
padding-left:20px;
}
.instances .go-back{
margin-top:17px;
display:inline-block;
}
/*
Responsive image

View file

@ -27,6 +27,7 @@
</tbody>
</table>
</noscript>
<a href="../" class="go-back">&lt; Go back</a>
<div id="popup-bg"></div>
<div class="popup-wrapper">
<div class="popup"></div>

14
web.php
View file

@ -146,9 +146,17 @@ if(count($results["image"]) !== 0){
$right["image"] .=
'<a class="image" href="' . htmlspecialchars($image["url"]) . '" rel="noreferrer nofollow" title="' . htmlspecialchars($image["title"]) . '" data-json="' . htmlspecialchars(json_encode($image["source"])) . '" tabindex="-1">' .
'<img src="' . $frontend->htmlimage($image["source"][count($image["source"]) - 1]["url"], "square") . '" alt="thumb">' .
'<div class="duration">' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '</div>' .
'</a>';
'<img src="' . $frontend->htmlimage($image["source"][count($image["source"]) - 1]["url"], "square") . '" alt="thumb">';
if(
$image["source"][0]["width"] !== null &&
$image["source"][0]["height"] !== null
){
$right["image"] .= '<div class="duration">' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '</div>';
}
$right["image"] .= '</a>';
}
$right["image"] .=