diff --git a/api/v1/ac.php b/api/v1/ac.php
index 107f5ec..ce9b3f2 100644
--- a/api/v1/ac.php
+++ b/api/v1/ac.php
@@ -19,7 +19,8 @@ class autocomplete{
"marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}",
"yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}",
"sc" => "",
- "startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english"
+ "startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english",
+ "kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}"
];
/*
diff --git a/data/config.php b/data/config.php
index cba8b66..0d44c19 100644
--- a/data/config.php
+++ b/data/config.php
@@ -63,6 +63,14 @@ class config{
//"via"
];
+ // Block SSL ciphers used by CLI tools used for botting
+ // Basically a primitive version of Cloudflare's browser integrity check
+ // ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
+ // https://git.lolcat.ca/lolcat/4get/docs/apache2.md
+ const DISALLOWED_SSL = [
+ // "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
+ ];
+
// Maximal number of searches per captcha key/pass issued. Counter gets
// reset on every APCU cache clear (should happen once a day).
// Only useful when BOT_PROTECTION is NOT set to 0
@@ -111,7 +119,7 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
- const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0";
+ const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0";
// Proxy pool assignments for each scraper
// false = Use server's raw IP
diff --git a/docs/apache2-example.md b/docs/apache2-example.md
new file mode 100644
index 0000000..09f5c1d
--- /dev/null
+++ b/docs/apache2-example.md
@@ -0,0 +1,195 @@
+# Sample Apache2 configuration
+This is the apache2 configuration file used on the 4get.ca official instance, in hopes that it's useful to you!
+
+Looking for the apache2 guide? go here..
+
+```xml
+
+ ServerName www.4get.ca
+
+ SSLEngine On
+ SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
+ SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
+
+ RedirectMatch 301 ^(.*)$ https://4get.ca$1
+
+
+
+ ServerName 4get.ca
+
+ ServerAdmin will@lolcat.ca
+ DocumentRoot /var/www/4get
+
+ SSLEngine On
+ SSLOptions +StdEnvVars
+
+ #ErrorLog ${APACHE_LOG_DIR}/error.log
+
+ AddOutputFilterByType DEFLATE application/json
+ AddOutputFilterByType DEFLATE application/javascript
+ AddOutputFilterByType DEFLATE application/x-javascript
+ AddOutputFilterByType DEFLATE text/html
+ AddOutputFilterByType DEFLATE text/plain
+ AddOutputFilterByType DEFLATE text/css
+
+ SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
+ SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
+
+
+ Options -MultiViews
+ AllowOverride All
+ Require all granted
+
+ RewriteEngine On
+ RewriteCond %{REQUEST_FILENAME} !-d
+ RewriteCond %{REQUEST_FILENAME} !-f
+ RewriteRule ^([^\.]+)$ $1.php [NC,L]
+
+
+ # deny access to private resources
+
+ Order Deny,allow
+ Deny from all
+
+
+
+
+ ServerName www.lolcat.ca
+
+ SSLEngine On
+ SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
+ SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
+
+ RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
+
+
+
+ ServerName lolcat.ca
+
+ ServerAdmin will@lolcat.ca
+ DocumentRoot /var/www/lolcat
+
+ SSLEngine On
+ SSLOptions +StdEnvVars
+
+ #ErrorLog ${APACHE_LOG_DIR}/error.log
+
+ AddOutputFilterByType DEFLATE application/json
+ AddOutputFilterByType DEFLATE application/javascript
+ AddOutputFilterByType DEFLATE application/x-javascript
+ AddOutputFilterByType DEFLATE text/html
+ AddOutputFilterByType DEFLATE text/plain
+ AddOutputFilterByType DEFLATE text/css
+
+ SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
+ SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
+
+
+ Options -MultiViews
+ AllowOverride All
+ Require all granted
+
+ RewriteEngine On
+ RewriteCond %{REQUEST_FILENAME} !-d
+ RewriteCond %{REQUEST_FILENAME} !-f
+ RewriteRule ^([^\.]+)$ $1.php [NC,L]
+
+
+
+
+ ServerName www.nyym.co
+
+ SSLEngine On
+ SSLCertificateFile /etc/letsencrypt/live/nyym.co/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/nyym.co/privkey.pem
+ SSLCertificateChainFile /etc/letsencrypt/live/nyym.co/chain.pem
+
+ RedirectMatch 301 ^(.*)$ https://nyym.co$1
+
+
+
+ ServerName nyym.co
+
+ ServerAdmin will@lolcat.ca
+ DocumentRoot /var/www/nyym
+
+ SSLEngine On
+ SSLOptions +StdEnvVars
+
+ #ErrorLog ${APACHE_LOG_DIR}/error.log
+
+ AddOutputFilterByType DEFLATE application/json
+ AddOutputFilterByType DEFLATE application/javascript
+ AddOutputFilterByType DEFLATE application/x-javascript
+ AddOutputFilterByType DEFLATE text/html
+ AddOutputFilterByType DEFLATE text/plain
+ AddOutputFilterByType DEFLATE text/css
+
+ SSLCertificateFile /etc/letsencrypt/live/nyym.co/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/nyym.co/privkey.pem
+ SSLCertificateChainFile /etc/letsencrypt/live/nyym.co/chain.pem
+
+
+ Options -MultiViews
+ AllowOverride All
+ Require all granted
+
+ RewriteEngine On
+ RewriteCond %{REQUEST_FILENAME} !-d
+ RewriteCond %{REQUEST_FILENAME} !-f
+ RewriteRule ^([^\.]+)$ $1.php [NC,L]
+
+
+
+
+ ServerName git.lolcat.ca
+
+ SSLEngine On
+ SSLOptions +StdEnvVars
+
+ #ErrorLog ${APACHE_LOG_DIR}/error.log
+
+ AddOutputFilterByType DEFLATE application/json
+ AddOutputFilterByType DEFLATE application/javascript
+ AddOutputFilterByType DEFLATE application/x-javascript
+ AddOutputFilterByType DEFLATE text/html
+ AddOutputFilterByType DEFLATE text/plain
+ AddOutputFilterByType DEFLATE text/css
+
+ SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
+ SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
+
+ ProxyPreserveHost On
+ ProxyRequests off
+ AllowEncodedSlashes NoDecode
+ ProxyPass / http://localhost:3000/ nocanon
+
+
+
+ ServerName live.lolcat.ca
+
+ ServerAdmin will@lolcat.ca
+ DocumentRoot /var/www/live
+
+ SSLEngine On
+ SSLOptions +StdEnvVars
+
+ #ErrorLog ${APACHE_LOG_DIR}/error.log
+
+ AddOutputFilterByType DEFLATE application/json
+ AddOutputFilterByType DEFLATE application/javascript
+ AddOutputFilterByType DEFLATE application/x-javascript
+ AddOutputFilterByType DEFLATE text/html
+ AddOutputFilterByType DEFLATE text/plain
+ AddOutputFilterByType DEFLATE text/css
+
+ SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
+ SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
+
+```
diff --git a/docs/apache2.md b/docs/apache2.md
index e746a7e..1e79327 100644
--- a/docs/apache2.md
+++ b/docs/apache2.md
@@ -74,7 +74,7 @@ Now, edit the following file: `/etc/apache2/sites-available/000-default.conf`, r
DocumentRoot /var/www/4get
- Options +MultiViews
+ Options -MultiViews
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-d
RewriteCond %{REQUEST_FILENAME} !-f
@@ -92,47 +92,56 @@ To make the above snippet work, please refer to our
- SSLOptions +StdEnvVars
-
-
- SSLOptions +StdEnvVars
-
-
+
AddOutputFilterByType DEFLATE application/json
AddOutputFilterByType DEFLATE application/javascript
AddOutputFilterByType DEFLATE application/x-javascript
AddOutputFilterByType DEFLATE text/html
AddOutputFilterByType DEFLATE text/plain
AddOutputFilterByType DEFLATE text/css
-
+
SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
-
-```
-
-This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private.
-```xml
-
- ServerName 4get.ca
-
- DocumentRoot /var/www/4get
-
- Options +MultiViews
- RewriteEngine On
- RewriteCond %{REQUEST_FILENAME} !-d
- RewriteCond %{REQUEST_FILENAME} !-f
- RewriteRule ^([^\.]+)$ $1.php [NC,L]
+ SSLCertificateChainFile /etc/letsencrypt/live/4get.ca/chain.pem
+
+
+ Options -MultiViews
+ AllowOverride All
+ Require all granted
+
+ RewriteEngine On
+ RewriteCond %{REQUEST_FILENAME} !-d
+ RewriteCond %{REQUEST_FILENAME} !-f
+ RewriteRule ^([^\.]+)$ $1.php [NC,L]
+
# deny access to private resources
@@ -142,28 +151,7 @@ This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that
```
-Don't forget to specify your other services here! Here's an example of a ruleset I use for `lolcat.ca`:
-```xml
-
- ServerName lolcat.ca
-
- DocumentRoot /var/www/lolcat
-
- Options +MultiViews
- RewriteEngine On
- RewriteCond %{REQUEST_FILENAME} !-d
- RewriteCond %{REQUEST_FILENAME} !-f
- RewriteRule ^([^\.]+)$ $1.php [NC,L]
-
-```
-
-... Alongside with it's redirect rules.
-```xml
-
- ServerName www.lolcat.ca
- RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
-
-```
+By default, the first rule dictates where traffic should be redirected to in case the client specifies an unknown domain name. Don't forget your webserver's other rules! For a complete real-world example, please check out my real-world config file I use on 4get.ca.
## security.conf
If you enabled the `headers` module, you can head over to `/etc/apache2/conf-enabled/security.conf` and edit:
diff --git a/lib/frontend.php b/lib/frontend.php
index ef55f4d..71ed6d7 100644
--- a/lib/frontend.php
+++ b/lib/frontend.php
@@ -89,6 +89,7 @@ class frontend{
$user_agent = "";
$bad_header = false;
+ // block bots that present X-Forwarded-For, Via, etc
foreach($headers_raw as $headerkey => $headervalue){
$headerkey = strtolower($headerkey);
@@ -106,12 +107,27 @@ class frontend{
}
}
+ // SSL check
+ $bad_ssl = false;
if(
+ isset($_SERVER["https"]) &&
+ $_SERVER["https"] == "on" &&
+ isset($_SERVER["SSL_CIPHER"]) &&
+ in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS)
+ ){
+
+ $bad_ssl = true;
+ }
+
+ if(
+ $bad_header === true ||
+ $bad_ssl === true ||
+ $user_agent == "" ||
+ // user agent check
preg_match(
config::HEADER_REGEX,
$user_agent
- ) ||
- $bad_header === true
+ )
){
// bot detected !!
@@ -1306,7 +1322,7 @@ class frontend{
return htmlspecialchars($image);
}
- return "/proxy?i=" . urlencode($image) . "&s=" . $format;
+ return "https://4get.ca/proxy?i=" . urlencode($image) . "&s=" . $format;
}
public function htmlnextpage($gets, $npt, $page){
diff --git a/scraper/mwmbl.php b/scraper/mwmbl.php
index f2f8b70..631b90c 100644
--- a/scraper/mwmbl.php
+++ b/scraper/mwmbl.php
@@ -52,7 +52,7 @@ class mwmbl{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
- curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); // @todo reset
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
diff --git a/scraper/pinterest.php b/scraper/pinterest.php
index f3c4439..3787f77 100644
--- a/scraper/pinterest.php
+++ b/scraper/pinterest.php
@@ -13,7 +13,7 @@ class pinterest{
return [];
}
- private function get($url, $get = []){
+ private function get($proxy, $url, $get = []){
$curlproc = curl_init();
@@ -45,7 +45,7 @@ class pinterest{
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
- $this->proxy->assign_proxy($curlproc);
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -60,45 +60,63 @@ class pinterest{
public function image($get){
- $search = $get["s"];
-
- $out = [
- "status" => "ok",
- "npt" => null,
- "image" => []
- ];
-
- $filter = [
- "source_url" => "/search/pins/?q=" . urlencode($search),
- "rs" => "typed",
- "data" =>
- json_encode(
- [
- "options" => [
- "article" => null,
- "applied_filters" => null,
- "appliedProductFilters" => "---",
- "auto_correction_disabled" => false,
- "corpus" => null,
- "customized_rerank_type" => null,
- "filters" => null,
- "query" => $search,
- "query_pin_sigs" => null,
- "redux_normalize_feed" => true,
- "rs" => "typed",
- "scope" => "pins", // pins, boards, videos,
- "source_id" => null
- ],
- "context" => []
- ]
- ),
- "_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
- ];
+ if($get["npt"]){
+
+ // @TODO
+ // post data for next page
+ $data = [
+ "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
+ "data" =>
+ json_encode(
+ [
+ // {"options":{"applied_filters":null,"appliedProductFilters":"---","article":null,"auto_correction_disabled":false,"corpus":null,"customized_rerank_type":null,"domains":null,"filters":null,"journey_depth":null,"page_size":null,"price_max":null,"price_min":null,"query_pin_sigs":null,"query":"higurashi","redux_normalize_feed":true,"rs":"typed","scope":"pins","selected_one_bar_modules":null,"source_id":null,"source_module_id":null,"top_pin_id":null,"bookmarks":["Y2JVSG81V2sxcmNHRlpWM1J5VFVad1ZsWlVRbXhpVmtreVZsZHpOV0pIU2tkV2FscFhVbXhhVkZreU1WSmtNREZWVjIxR1RrMXNTbEJXYlhSaFVtMVdjMVZ1U2xaaWEzQnpXVlJPVTJWV1pISlhhM1JYVm10V05sVldVbE5XVjBwMVVXMUdWVll6VFhoVWJYaFhWMVp3Ums1V1RsTmlSbGt5Vm10YWFtVkdWbkpOU0dSUFZsZG9XRmxzWkc5VlZscHlWbGhrYkdKR1NubFdWelZQWVVaYWRHVkVRbFppUmtwVVZrUktWMlJIVWtWV2JHaHBVakZLU0Zkc1pEUmtNVnBZVW10b2FsSXdXbkJXYlRWRFpHeGFSMWRzVG1oaGVrWllXV3RvVTFVeFpFaFZiRUpoVm5wRk1GbHFSbXRYVjA1R1YyczFWMVpHV2pSWFZtaDNVakZrY2sxWVRsaGlhM0JXV1ZSR1MyRkdiRlZTYm1SVVVteHdXbGxWVlRGVk1VbDVWRmhrVjAxdVVuWlVhMXBTWlVaT2MxcEhSbE5TTWswMVdtdGFWMU5YU2paVmJYaFRUVmhDUjFZeU5YZFVNVkY0VjJ0b1ZXRnJOVlpVVmxwTFVURndXR042VmxOV2ExcGFXVlZWTlZVeFNYZE5WRTVYVWtWYVZGWkhNVTlXTVU1WllVWk9hR1ZyV2s1WFZ6QXhZakpPVjFWWWFHRlNWbkJRVm14U1IwMUdXWGxOVkVKVlRWWnNORll5TURWV1YwVjVWV3hDV21FeGNETmFSVnByVjFkS1IyTkhhR2xYUjJkM1ZtdGFhMlF4VVhsVGJGcE9Wa1p3YjFwWGVFdFZWbFp4VW14YWJGWnRVbHBaTUdoTFZHMUtTR1ZJYUZkV2VrWjJWMVphU21ReVJYcGpSbFpwVW10d1RGZHJVa0pPVms1SFZHNVNUbFl3V2xoVmJYUldaVVpaZUZremFGUk5hM0JYVkZaYVYyRkZNSGxWYkVKYVlrWlZlRnBGV210WFIwNUpVMnMxVTFaR1dscFdWekI0VFVaV1IxTllaR3BUUlhCb1dWUkdWbVZHVm5SbFJuQnNZbFpKTWxSVlVYaFBSVGxGV1hwR1QyVnJSVEZVVlZKT1RrVXhSVkpVUWs5bGJFVXhWRmhzZDFOR1ZsWmtNMFp0VWpGYWIxZFhjRXBsUlRGSVZWaHdUbFl4YTNoVVZWSnFUVVUxV0ZadGFFOVNSVnB6Vkd0a1drMUdiRFpUVkVaT1pXMWplRmRzVWxkaFJuQllWVlJTVDJWdFRqWlVNVkpTWlZad2NWcEhkRTlsYTFwMFZGVlNhMkpWTVZWVFZFcE9Wa1pzTmxkWE1WSk9WVEYwVlcweFVGWXdXVFJXUjNSWFYwZGFRbEJVTVRoUFJHTXhUbnBCTlUxRVRUUk5SRVV3VG5wUk5VMTVjRWhWVlhkeFprUlZlRTlFVVRKWlZHc3lUMWRSTWsxVVVUSk9iVnBvV1RKWmVrNTZXWGhPTWs1cFQwUkZNVTlFVm1sTlZGcHBUV3BTYTFsWFRtcE9SR015VG1wVk5GbHFaR2haVjFacldWUmFiVmxxWkdoYVZGWnFUa1JXT0ZSclZsaG1RVDA5fFVIbzVhRkpYZUc1WFYyUlpWVEpHYkdGNk1XWk5ha1ptVFZSR09FOUVZekZPZWtFMVRVUk5ORTFFUlRCT2VsRTFUWGx3U0ZWVmQzRm1SMWw1VFZSUk1WbDZUVEJhUjFGNVQxZFNhVnB0VlRGT1JFVXdXVlJuZVU1cVRUUk5hbU40VDBSSk1VNXFWVEZOYlZwcVdsUnJlRTFFVVhwWmVsVjNXbXBvYkU1dFJYbE9ha0Y2VDFSSk5VMTZWVEJaYWtJNFZHdFdXR1pCUFQwPXxOb25lfDg3NTcwOTAzODAxNDc0OTMqR1FMKnwzMjM3YjM3ZGNhMGU3YjYyYzYzYzAyZGJkNGU1MjdlNzMyMTExMTNlMmUyMzEyOWM2MDAzYmU1ZTlmZjkwYjAwfE5FV3w="]},"context":{}}
+ ]
+ );
+ ];
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $filter = [
+ "source_url" => "/search/pins/?q=" . urlencode($search),
+ "rs" => "typed",
+ "data" =>
+ json_encode(
+ [
+ "options" => [
+ "article" => null,
+ "applied_filters" => null,
+ "appliedProductFilters" => "---",
+ "auto_correction_disabled" => false,
+ "corpus" => null,
+ "customized_rerank_type" => null,
+ "filters" => null,
+ "query" => $search,
+ "query_pin_sigs" => null,
+ "redux_normalize_feed" => true,
+ "rs" => "typed",
+ "scope" => "pins", // pins, boards, videos,
+ "source_id" => null
+ ],
+ "context" => []
+ ]
+ ),
+ "_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
+ ];
+
+ $proxy = $this->backend->get_ip();
+ }
try{
$json =
json_decode(
$this->get(
+ $proxy,
"https://www.pinterest.ca/resource/BaseSearchResource/get/",
$filter
),
@@ -115,7 +133,11 @@ class pinterest{
throw new Exception("Failed to decode JSON");
}
- //print_r($json);
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
foreach(
$json
@@ -189,7 +211,6 @@ class pinterest{
break;
case "board":
-
if(isset($item["cover_pin"]["image_url"])){
$image = [
diff --git a/settings.php b/settings.php
index 6f99e93..046e7c7 100644
--- a/settings.php
+++ b/settings.php
@@ -83,6 +83,10 @@ $settings = [
"value" => "startpage",
"text" => "Startpage"
],
+ [
+ "value" => "kagi",
+ "text" => "Kagi"
+ ],
[
"value" => "qwant",
"text" => "Qwant"