diff --git a/.gitignore b/.gitignore index efe24b8..9ad46f8 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,5 @@ release yarn-error.log yarn.lock .history -.vscode .umirc.local.js -.now \ No newline at end of file +.now diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..1f27ee5 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,31 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name":"Python: Current File", + "type":"python", + "request":"launch", + "program":"${file}", + "args": ["web-result.txt"], + "console":"integratedTerminal" + }, + { + "type": "node", + "request": "launch", + "name": "Launch Program", + "skipFiles": [ + "/**" + ], + "env": { + "PORT":"8011", + "localFlag": "true", + "runtimeArgs": ["--tls-min-v1.0"], + "args": [] + }, + "program": "${workspaceFolder}/index.js" + } + ] +} diff --git a/Proxy.js b/Proxy.js index 0bfe81d..f67d1a3 100644 --- a/Proxy.js +++ b/Proxy.js @@ -1,6 +1,7 @@ var express = require('express'); var proxy = require('http-proxy-middleware'); const zlib = require("zlib") +const fs = require("fs") const parse = require('url-parse') const cookiejar = require('cookiejar') const iconv = require('iconv-lite') @@ -28,6 +29,17 @@ var enableCors = function(req, res) { } }; +var saveRecord = ({stream, fwdStr, req, host, pktLen}) => { + if (fwdStr) { + let dateStr = new Date().toLocaleString() + let ips = fwdStr.split(',') + if (ips.length > 0) { + let sourceIP = ips[0] + stream.write(`${dateStr},${sourceIP},${host},${pktLen},${req.url}\n`) + } + } +} + var redirect2HomePage = function({res, httpprefix, serverName,} ) { try { res.setHeader('location',`${httpprefix}://${serverName}`) @@ -46,14 +58,12 @@ let getHostFromReq = (req) => { //return target let httpType = 'https' if (req.url.startsWith(https_prefix)) { host = req.url.slice(https_prefix.length, req.url.length) - if (host.indexOf('/') !== -1) { - host = host.slice(0, host.indexOf('/')) - } + let hosts = host.match(/[-a-z0-9A-Z]+\.[-a-z0-9A-Z.]+/g); + host = hosts.length>0?hosts[0]:'' } else if (req.url.startsWith(http_prefix)) { host = req.url.slice(http_prefix.length, req.url.length) - if (host.indexOf('/') !== -1) { - host = host.slice(0, host.indexOf('/')) - } + let hosts = host.match(/[-a-z0-9A-Z]+\.[-a-z0-9A-Z.]+/g); + host = hosts.length>0?hosts[0]:'' httpType = 'http' } else if (req.headers['referer'] && req.headers['referer'].indexOf('https/') !== -1) { let start = req.headers['referer'].indexOf('https/') + 6 @@ -90,8 +100,8 @@ let getHostFromReq = (req) => { //return target let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomainRewrite, locationReplaceMap302, regReplaceMap, siteSpecificReplace, pathReplace}) => { + let stream = fs.createWriteStream("web-records.csv", {flags:'a'}) let handleRespond = ({req, res, body, gbFlag}) => { - // logSave("res from proxied server:", body); let myRe let {host, httpType} = getHostFromReq(req) let location = res.getHeaders()['location'] @@ -113,15 +123,14 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai } logSave(`##### host:${host}`) if (host) { - body = pathReplace({host, httpType, body}) + body = pathReplace({host, httpType, body}) //13ms } - // remove duplicate /https/siteproxylocal.now.sh:443 - myRe = new RegExp(`/${httpprefix}/${serverName}.*?/`, 'g') // match group - body = body.replace(myRe, '/') - + logSave(`2`) + logSave(`3`) myRe = new RegExp(`/${httpType}/${host}/${httpType}/${host}/`, 'g') // match group body = body.replace(myRe, `/${httpType}/${host}/`) + logSave(`4`) //1ms // put siteSpecificReplace at end Object.keys(siteSpecificReplace).forEach( (site) => { if (!req.url) { @@ -134,8 +143,9 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai body = body.replace(myRe, siteSpecificReplace[site][key]) }) } - }) + }) //17ms + logSave(`5`) if (gbFlag) { body = iconv.encode(body, 'gbk') } @@ -144,14 +154,15 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai // need to manually redirect it for youtube workaround. console.log(`============== redirect googlevideo.com`) try { - res.setHeader('location', body) + res.setHeader('location', body) //0ms } catch(e) { logSave(`error: ${e}`) return } res.statusCode = '302' } - body = zlib.gzipSync(body) + logSave(`6`) + body = zlib.gzipSync(body) //19ms try { res.setHeader('content-encoding', 'gzip'); logSave(`handleRespond: res.statusCode:${res.statusCode}, res.headers:${JSON.stringify(res.getHeaders())}`) @@ -167,7 +178,7 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai } // only support https for now. const router = (req) => { //return target - let myRe = new RegExp(`/${httpprefix}/${serverName}.*?/`, 'g') // match group + let myRe = new RegExp(`/http[s]?/${serverName}.*?/`, 'g') // match group req.url = req.url.replace(myRe, '/') let {host, httpType} = getHostFromReq(req) @@ -189,6 +200,7 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai */ // hostRewrite: true, // autoRewrite: true, + // proxyTimeout: 15000, // 10 seconds protocolRewrite: true, // followRedirects: true, cookieDomainRewrite, @@ -211,11 +223,28 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai onProxyRes: (proxyRes, req, res) => { let {host, httpType} = getHostFromReq(req) logSave(`proxyRes.status:${proxyRes.statusCode} proxyRes.headers:${JSON.stringify(proxyRes.headers)}`) - let body = Buffer.from(''); + let bodyList = [] + let bodyLength = 0 + let endFlag = false proxyRes.on('data', function(data) { - body = Buffer.concat([body, data]); + // body = Buffer.concat([body, data]); + if (endFlag === true) { + return // don't have to push it to bodyList + } + bodyLength += data.length + bodyList.push(data) + if (res.getHeader('content-type') && res.getHeader('content-type').indexOf('video') !== -1) { + let fwdStr = req.headers['X-Forwarded-For'] || req.headers['x-forwarded-for'] + if ((host.indexOf('cdn') !== -1 && bodyLength >= 105000000) || + (host.indexOf('cdn') === -1 && bodyLength >= 2500000)) { + } + } }) proxyRes.on('end', function() { + if (endFlag === true) { + return + } + let body = Buffer.concat(bodyList) let gbFlag = false if (proxyRes.headers["content-encoding"] === 'gzip' || proxyRes.headers["content-encoding"] === 'br') { @@ -245,29 +274,42 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai logSave(`utf-8 text...`) let originBody = gunzipped body = gunzipped.toString('utf-8'); - if (body.indexOf('="text/html; charset=gb') !== -1 || - body.indexOf(' charset="gb') !== -1 || - body.indexOf('=\'text/html; charset=gb') !== -1) { + let searchBody = body.slice(0, 1000) + if (searchBody.indexOf('="text/html; charset=gb') !== -1 || + searchBody.indexOf(' charset="gb') !== -1 || + searchBody.indexOf('=\'text/html; charset=gb') !== -1) { logSave(`gb2312 found...`) body = iconv.decode(originBody, 'gbk') gbFlag = true } + let fwdStr = req.headers['X-Forwarded-For'] || req.headers['x-forwarded-for'] || '' + if (proxyRes.statusCode === 200 && proxyRes.headers["content-type"] && + proxyRes.headers["content-type"].indexOf('text/html') !== -1) { + saveRecord({stream, fwdStr, req, host, pktLen:body.length}) + } + if (proxyRes.statusCode === 200 && req.url.indexOf('/sw.js') !== -1) { + // fetching sw.js + res.setHeader('service-worker-allowed','/') + } handleRespond({req, res, body, gbFlag}) } else { // console.log(`2========>${logGet()}`) - let key = "content-encoding" - if(key in proxyRes.headers) { - res.setHeader(key, proxyRes.headers[key]); + try { + let key = "content-encoding" + if(key in proxyRes.headers) { + res.setHeader(key, proxyRes.headers[key]); + } + logSave(`2: res.headers:${JSON.stringify(res.getHeaders())}`) + if (req.headers['debugflag']==='true') { + res.removeHeader('content-encoding') + res.setHeader('content-type','text/plain') + body=logGet() + } + res.end(body) + } catch(e) { + console.log(`error:${e}`) } - logSave(`2: res.headers:${JSON.stringify(res.getHeaders())}`) - if (req.headers['debugflag']==='true') { - res.removeHeader('content-encoding') - res.setHeader('content-type','text/plain') - body=logGet() - } - res.end(body) } - } else if (proxyRes.statusCode === 301 || proxyRes.statusCode === 302 || proxyRes.statusCode === 307 || proxyRes.statusCode === 308 || (proxyRes.headers["content-type"] && (proxyRes.headers["content-type"].indexOf('text/') !== -1 || @@ -291,16 +333,28 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai res.setHeader('content-type','text/plain') body=logGet() } + if (res.getHeader('content-type') && res.getHeader('content-type').indexOf('video') !== -1) { + let fwdStr = req.headers['X-Forwarded-For'] || req.headers['x-forwarded-for'] + console.log(`route:${fwdStr}, length:${bodyLength}, ${host}`) + } res.end(body) } }) const setCookieHeaders = proxyRes.headers['set-cookie'] || [] + let datestr = '' + if (setCookieHeaders.length > 0) { + let date = new Date + date.setDate(date.getDate() + 1) // 一天之后过期 + datestr = date.toUTCString() + } const modifiedSetCookieHeaders = setCookieHeaders .map(str => new cookiejar.Cookie(str)) .map(cookie => { logSave(`cookie:${JSON.stringify(cookie)}`) if (cookie.path && cookie.path[0] === '/') { cookie.domain = `${serverName}` + cookie.expiration_date = datestr + cookie.path = `/${httpType}/${host}${cookie.path}` } cookie.secure = false return cookie @@ -352,7 +406,7 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai } } let timestr = new Date().toISOString() - console.log(`[${timestr}] route:${fwdStr}, httpType:${httpType}, host:${host}`) + console.log(`route:${fwdStr}, httpType:${httpType}, host:${host}`) if (host.indexOf(serverName) !== -1 || // we cannot request resource from proxy itself host == '' || host.indexOf('.') === -1 || (fwdStr && fwdStr.split(',').length > 3)) { // too many forwardings res.status(404).send("{}") @@ -368,9 +422,16 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai let newpath = req.url.replace(`/${httpType}/${host}`, '') || '/' logSave(`httpType:${httpType}, host:${host}, req.url:${req.url}, req.headers:${JSON.stringify(req.headers)}`) Object.keys(req.headers).forEach(function (key) { - if (key.indexOf('x-') === 0) { + // remove nginx/cloudflare/pornhub related headers + if ((host.indexOf('twitter.com') === -1 && key.indexOf('x-') === 0) || + key.indexOf('sec-fetch') === 0 || + key.indexOf('cf-') === 0) { logSave(`remove key=${key},`) proxyReq.removeHeader(key) + if (key === 'sec-fetch-mode') { + proxyReq.setHeader('sec-fetch-mode', 'cors') + } + return } logSave(`set key=${key},`) proxyReq.setHeader(key, req.headers[key]) diff --git a/config.js b/config.js index e9f61ed..227e984 100644 --- a/config.js +++ b/config.js @@ -9,7 +9,7 @@ const {CookieAccessInfo, CookieJar, Cookie} = cookiejar let config = { httpprefix: 'https', port: 443, - serverName: 'siteproxy.netptop.com', + serverName: 'proxy.netptop.com', } let blockedSites = ['merlinblog.xyz'] @@ -64,17 +64,12 @@ const locationReplaceMap302 = ({location, serverName, httpprefix, host, httpType } const regReplaceMap = { - '"//([-a-z0-9A-Z.]+)': `"//${serverName}:${port}/https/$1`, // default use https - '\'//([-a-z0-9A-Z.]+)': `'//${serverName}:${port}/https/$1`,// default use https + '(["\'])//([-a-z0-9A-Z.]+)': `$1//${serverName}:${port}/https/$2`, // default use https 'url[(]//([-a-z0-9A-Z.]+)': `url(//${serverName}:${port}/https/$1`,// default use https - 'https:(././)([-a-z0-9A-Z.]+)': `${httpprefix}:$1${serverName}:${port}\\/https\\/$2`, - 'http:(././)([-a-z0-9A-Z.]+)': `${httpprefix}:$1${serverName}:${port}\\/http\\/$2`, - 'https://([-a-z0-9A-Z.]+)': `${httpprefix}://${serverName}:${port}/https/$1`, - 'http://([-a-z0-9A-Z.]+)': `${httpprefix}://${serverName}:${port}/http/$1`, - 'https%3a%2f%2f([-a-z0-9A-Z]+?)': `${httpprefix}%3a%2f%2f${serverName}%3a${port}%2fhttps%2f$1`, - 'http%3a%2f%2f([-a-z0-9A-Z]+?)': `${httpprefix}%3a%2f%2f${serverName}%3a${port}%2fhttp%2f$1`, - 'https%3A%2F%2F([-a-z0-9A-Z]+?)': `${httpprefix}%3A%2F%2F${serverName}%3A${port}%2Fhttps%2F$1`, - 'http%3A%2F%2F([-a-z0-9A-Z]+?)': `${httpprefix}%3A%2F%2F${serverName}%3A${port}%2Fhttp%2F$1`, + '(http[s]?):(\\\\/)\\\\/([-a-z0-9A-Z])': `${httpprefix}:$2$2${serverName}:${port}$2$1$2$3`, + '(http[s]?)://([-a-z0-9A-Z])': `${httpprefix}://${serverName}:${port}/$1/$2`, + '(http[s]?)(%3[aA])(%2[fF])%2[fF]([-a-z0-9A-Z])': `${httpprefix}$2$3$3${serverName}$2${port}$3$1$3$4`, + '"(http[s]?)://"': `"${httpprefix}://${serverName}:${port}/https/"`, ' integrity=".+?"': '', // remove integrity } @@ -146,7 +141,7 @@ const siteSpecificReplace = { 'href="/https/www.google.com/g(.;)': 'href="/g$1', '[\(]"/url': `\("/https/www.google.com/url`, //s_Gj("/url?sa=t&source=web&rct=j");s_Nj '"/url"': `"/https/www.google.com/url"`, - // 'f="/"[+]f': `f="/https/www.google.com/"\+f`, + 'f="/"[+]f': `f="/https/www.google.com/"\+f`, // mobile next page issue. }, 'www.gstatic.com': { 'href="/https/www.gstatic.com/g(.;)': 'href="/g$1', @@ -234,7 +229,7 @@ const siteSpecificReplace = { 'web.telegram.org': { '"pluto"': `"${serverName}:${port}/https/pluto"`, '"venus"': `"${serverName}:${port}/https/venus"`, - '"aurora"': `"${serverName}:${port}/https/aurora"`, + '"aurora"':`"${serverName}:${port}/https/aurora"`, '"vesta"': `"${serverName}:${port}/https/vesta"`, '"flora"': `"${serverName}:${port}/https/flora"`, ' href=([\"\']?)([-a-z0-9_]+?)': ` href=$1/https/web.telegram.org/$2`, @@ -244,6 +239,12 @@ const siteSpecificReplace = { 'doubibackup.com': { ' href=([\"\']?)([-a-z0-9_]+?)': ` href=$1/https/doubibackup.com/$2`, ' src=("[-a-z0-9_]+?)': ` src=/https/doubibackup.com/$1`, + }, + 'pornhub.com': { + '"/dv.p"([ ]?[+][ ]?"hn")': `"/https/www.pornhub.com/dv.p"[ ]?[+][ ]?"$1`, + }, + 'phncdn.com': { + // '("[:]?//)': `$1${serverName}:${port}/https/`, // default to https } } diff --git a/index.html b/index.html index 82aa0fa..2ef4e0e 100644 --- a/index.html +++ b/index.html @@ -727,7 +727,7 @@

@@ -741,7 +741,7 @@

注意,本网站: - https://siteproxy.netptop.com 一定会被墙,时间未知. 请记下项目开源网址(不会被墙): + https://proxy.netptop.com 一定会被墙,时间未知. 请记下项目开源网址(不会被墙): https://github.com/netptop/siteproxy 以便在本网站失效后能找到新的网址; 或者加入telegram电报群: @siteproxy获取更新.
@@ -773,32 +773,32 @@

@@ -815,47 +815,52 @@

+ @@ -872,17 +877,22 @@

+ @@ -960,7 +970,7 @@

$('#txt').keydown(function(ev){ // 回车键的处理 if(ev.keyCode==13){ - window.open(thisSearch + $('#txt').val(), "_self") + window.open(thisSearch + $('#txt').val(), "_blank") // $('#txt').val(''); $('#box ul').html('') } @@ -1003,7 +1013,7 @@

$("#search-btn").click(function(){ var textValue = $('#txt').val(); if(textValue != ''){ - window.open(thisSearch + textValue, "_self") + window.open(thisSearch + textValue, "_blank") } }); diff --git a/index.js b/index.js index 98aad43..2e95feb 100644 --- a/index.js +++ b/index.js @@ -10,6 +10,9 @@ let proxy = Proxy({ blockedSites, urlModify, httpprefix, serverName, port, cooki const middle1 = (req, res, next) => { let timestr = new Date().toISOString() + let myRe = new RegExp(`/http[s]?/${serverName}.*?/`, 'g') // match group // remove duplicate https/${serverName}:${port}/ + req.url = req.url.replace(myRe, '/') + console.log(`${timestr}: req.url:${req.url}`) const dirPath = path.join(__dirname, req.url) let fwdStr = req.headers['x-forwarded-for'] diff --git a/test/siteproxylocal.test.js b/test/siteproxylocal.test.js index c68a018..107d45f 100644 --- a/test/siteproxylocal.test.js +++ b/test/siteproxylocal.test.js @@ -240,8 +240,6 @@ test('youtube homepage issue', async () => { // ht-time":"0","x-request-start":"1584141848398","total-route-time":"0"} // h2020-03-13T23:24:08.403097+00:00 app[web.1]: httpType:https, host:id.google.com - - test('no content-type field situation', async () => { const url = `${httpprefix}://${serverName}:${port}/https/onetag-sys.com/usync/?pubId=5927d926323dc2c` const response = await axios({ @@ -249,6 +247,8 @@ test('no content-type field situation', async () => { headers: { 'Accept': `text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9`, }, + maxRedirects: 0, + validateStatus: null, // important for status 302 url, }) // console.log(`${JSON.stringify(response.headers)}`) @@ -438,7 +438,7 @@ test('youtube href fonts.googleapis.com issue', async () => { const response = await axios({ method: 'get', headers: { - 'Accept': `text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9`, + 'Accept': `text/html,application/xhtml+xml,applino content-type fieldcation/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9`, }, url, }) @@ -457,6 +457,34 @@ test('youtube watch address bar issue', async () => { url, }) // console.log(`${JSON.stringify(response.headers)}`) - console.log(`${JSON.stringify(response.data)}`) + // console.log(`${JSON.stringify(response.data)}`) expect(JSON.stringify(response.data).indexOf(`{"url":"/watch?v=tTzRY7F_1OU"`)).toBe(-1) }, 15000); // should be done within 3 seconds. + +test('"https://" should be removed', async () => { + const url = `${httpprefix}://${serverName}:${port}/https/di.phncdn.com/www-static/js/ph-tracking.js?cache=2020051402` + const response = await axios({ + method: 'get', + headers: { + 'Accept': `text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9`, + }, + url, + }) + // console.log(`${JSON.stringify(response.headers)}`) + // console.log(`${response.data}`) + expect((response.data).indexOf(`"https://"`)).toBe(-1) +}, 15000); // should be done within 3 seconds. + +test('30.toString() should not be existed', async () => { + const url = `${httpprefix}://${serverName}:${port}/https/www.youtube.com/s/desktop/751ee0bc/htdocs-ytimg-desktop-kevlar-production/jsbin/desktop_polymer_inlined_html_polymer_flags_legacy_browsers_v2.vflset/desktop_polymer_inlined_html_polymer_flags_legacy_browsers_v2.js` + const response = await axios({ + method: 'get', + headers: { + 'Accept': `text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9`, + }, + url, + }) + // console.log(`${JSON.stringify(response.headers)}`) + console.log(`${response.data}`) + expect((response.data).indexOf(`30.toString()`)).toBe(-1) +}, 15000); // should be done within 3 seconds.