Skip to content

Commit

Permalink
fixed host parsing issue; fixed the issue that '/https/proxy.netptop.…
Browse files Browse the repository at this point in the history
…com/' exists in url; improve performance by using bodyList;fixed twitter search issue by add x- headers for twitter only;allow service worker running at / scope; set cookie expiration at 1 day; fixed the cloudflare wrap issue for www.xvideos.com;
  • Loading branch information
netptop committed May 18, 2020
1 parent be94956 commit 921459a
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 75 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@ release
yarn-error.log
yarn.lock
.history
.vscode
.umirc.local.js
.now
.now
31 changes: 31 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name":"Python: Current File",
"type":"python",
"request":"launch",
"program":"${file}",
"args": ["web-result.txt"],
"console":"integratedTerminal"
},
{
"type": "node",
"request": "launch",
"name": "Launch Program",
"skipFiles": [
"<node_internals>/**"
],
"env": {
"PORT":"8011",
"localFlag": "true",
"runtimeArgs": ["--tls-min-v1.0"],
"args": []
},
"program": "${workspaceFolder}/index.js"
}
]
}
129 changes: 95 additions & 34 deletions Proxy.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
var express = require('express');
var proxy = require('http-proxy-middleware');
const zlib = require("zlib")
const fs = require("fs")
const parse = require('url-parse')
const cookiejar = require('cookiejar')
const iconv = require('iconv-lite')
Expand Down Expand Up @@ -28,6 +29,17 @@ var enableCors = function(req, res) {
}
};

var saveRecord = ({stream, fwdStr, req, host, pktLen}) => {
if (fwdStr) {
let dateStr = new Date().toLocaleString()
let ips = fwdStr.split(',')
if (ips.length > 0) {
let sourceIP = ips[0]
stream.write(`${dateStr},${sourceIP},${host},${pktLen},${req.url}\n`)
}
}
}

var redirect2HomePage = function({res, httpprefix, serverName,} ) {
try {
res.setHeader('location',`${httpprefix}://${serverName}`)
Expand All @@ -46,14 +58,12 @@ let getHostFromReq = (req) => { //return target
let httpType = 'https'
if (req.url.startsWith(https_prefix)) {
host = req.url.slice(https_prefix.length, req.url.length)
if (host.indexOf('/') !== -1) {
host = host.slice(0, host.indexOf('/'))
}
let hosts = host.match(/[-a-z0-9A-Z]+\.[-a-z0-9A-Z.]+/g);
host = hosts.length>0?hosts[0]:''
} else if (req.url.startsWith(http_prefix)) {
host = req.url.slice(http_prefix.length, req.url.length)
if (host.indexOf('/') !== -1) {
host = host.slice(0, host.indexOf('/'))
}
let hosts = host.match(/[-a-z0-9A-Z]+\.[-a-z0-9A-Z.]+/g);
host = hosts.length>0?hosts[0]:''
httpType = 'http'
} else if (req.headers['referer'] && req.headers['referer'].indexOf('https/') !== -1) {
let start = req.headers['referer'].indexOf('https/') + 6
Expand Down Expand Up @@ -90,8 +100,8 @@ let getHostFromReq = (req) => { //return target


let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomainRewrite, locationReplaceMap302, regReplaceMap, siteSpecificReplace, pathReplace}) => {
let stream = fs.createWriteStream("web-records.csv", {flags:'a'})
let handleRespond = ({req, res, body, gbFlag}) => {
// logSave("res from proxied server:", body);
let myRe
let {host, httpType} = getHostFromReq(req)
let location = res.getHeaders()['location']
Expand All @@ -113,15 +123,14 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
}
logSave(`##### host:${host}`)
if (host) {
body = pathReplace({host, httpType, body})
body = pathReplace({host, httpType, body}) //13ms
}
// remove duplicate /https/siteproxylocal.now.sh:443
myRe = new RegExp(`/${httpprefix}/${serverName}.*?/`, 'g') // match group
body = body.replace(myRe, '/')

logSave(`2`)
logSave(`3`)
myRe = new RegExp(`/${httpType}/${host}/${httpType}/${host}/`, 'g') // match group
body = body.replace(myRe, `/${httpType}/${host}/`)

logSave(`4`) //1ms
// put siteSpecificReplace at end
Object.keys(siteSpecificReplace).forEach( (site) => {
if (!req.url) {
Expand All @@ -134,8 +143,9 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
body = body.replace(myRe, siteSpecificReplace[site][key])
})
}
})
}) //17ms

logSave(`5`)
if (gbFlag) {
body = iconv.encode(body, 'gbk')
}
Expand All @@ -144,14 +154,15 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
// need to manually redirect it for youtube workaround.
console.log(`============== redirect googlevideo.com`)
try {
res.setHeader('location', body)
res.setHeader('location', body) //0ms
} catch(e) {
logSave(`error: ${e}`)
return
}
res.statusCode = '302'
}
body = zlib.gzipSync(body)
logSave(`6`)
body = zlib.gzipSync(body) //19ms
try {
res.setHeader('content-encoding', 'gzip');
logSave(`handleRespond: res.statusCode:${res.statusCode}, res.headers:${JSON.stringify(res.getHeaders())}`)
Expand All @@ -167,7 +178,7 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
}
// only support https for now.
const router = (req) => { //return target
let myRe = new RegExp(`/${httpprefix}/${serverName}.*?/`, 'g') // match group
let myRe = new RegExp(`/http[s]?/${serverName}.*?/`, 'g') // match group
req.url = req.url.replace(myRe, '/')

let {host, httpType} = getHostFromReq(req)
Expand All @@ -189,6 +200,7 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
*/
// hostRewrite: true,
// autoRewrite: true,
// proxyTimeout: 15000, // 10 seconds
protocolRewrite: true,
// followRedirects: true,
cookieDomainRewrite,
Expand All @@ -211,11 +223,28 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
onProxyRes: (proxyRes, req, res) => {
let {host, httpType} = getHostFromReq(req)
logSave(`proxyRes.status:${proxyRes.statusCode} proxyRes.headers:${JSON.stringify(proxyRes.headers)}`)
let body = Buffer.from('');
let bodyList = []
let bodyLength = 0
let endFlag = false
proxyRes.on('data', function(data) {
body = Buffer.concat([body, data]);
// body = Buffer.concat([body, data]);
if (endFlag === true) {
return // don't have to push it to bodyList
}
bodyLength += data.length
bodyList.push(data)
if (res.getHeader('content-type') && res.getHeader('content-type').indexOf('video') !== -1) {
let fwdStr = req.headers['X-Forwarded-For'] || req.headers['x-forwarded-for']
if ((host.indexOf('cdn') !== -1 && bodyLength >= 105000000) ||
(host.indexOf('cdn') === -1 && bodyLength >= 2500000)) {
}
}
})
proxyRes.on('end', function() {
if (endFlag === true) {
return
}
let body = Buffer.concat(bodyList)
let gbFlag = false
if (proxyRes.headers["content-encoding"] === 'gzip' ||
proxyRes.headers["content-encoding"] === 'br') {
Expand Down Expand Up @@ -245,29 +274,42 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
logSave(`utf-8 text...`)
let originBody = gunzipped
body = gunzipped.toString('utf-8');
if (body.indexOf('="text/html; charset=gb') !== -1 ||
body.indexOf(' charset="gb') !== -1 ||
body.indexOf('=\'text/html; charset=gb') !== -1) {
let searchBody = body.slice(0, 1000)
if (searchBody.indexOf('="text/html; charset=gb') !== -1 ||
searchBody.indexOf(' charset="gb') !== -1 ||
searchBody.indexOf('=\'text/html; charset=gb') !== -1) {
logSave(`gb2312 found...`)
body = iconv.decode(originBody, 'gbk')
gbFlag = true
}
let fwdStr = req.headers['X-Forwarded-For'] || req.headers['x-forwarded-for'] || ''
if (proxyRes.statusCode === 200 && proxyRes.headers["content-type"] &&
proxyRes.headers["content-type"].indexOf('text/html') !== -1) {
saveRecord({stream, fwdStr, req, host, pktLen:body.length})
}
if (proxyRes.statusCode === 200 && req.url.indexOf('/sw.js') !== -1) {
// fetching sw.js
res.setHeader('service-worker-allowed','/')
}
handleRespond({req, res, body, gbFlag})
} else {
// console.log(`2========>${logGet()}`)
let key = "content-encoding"
if(key in proxyRes.headers) {
res.setHeader(key, proxyRes.headers[key]);
try {
let key = "content-encoding"
if(key in proxyRes.headers) {
res.setHeader(key, proxyRes.headers[key]);
}
logSave(`2: res.headers:${JSON.stringify(res.getHeaders())}`)
if (req.headers['debugflag']==='true') {
res.removeHeader('content-encoding')
res.setHeader('content-type','text/plain')
body=logGet()
}
res.end(body)
} catch(e) {
console.log(`error:${e}`)
}
logSave(`2: res.headers:${JSON.stringify(res.getHeaders())}`)
if (req.headers['debugflag']==='true') {
res.removeHeader('content-encoding')
res.setHeader('content-type','text/plain')
body=logGet()
}
res.end(body)
}

} else if (proxyRes.statusCode === 301 || proxyRes.statusCode === 302 || proxyRes.statusCode === 307 || proxyRes.statusCode === 308 ||
(proxyRes.headers["content-type"] &&
(proxyRes.headers["content-type"].indexOf('text/') !== -1 ||
Expand All @@ -291,16 +333,28 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
res.setHeader('content-type','text/plain')
body=logGet()
}
if (res.getHeader('content-type') && res.getHeader('content-type').indexOf('video') !== -1) {
let fwdStr = req.headers['X-Forwarded-For'] || req.headers['x-forwarded-for']
console.log(`route:${fwdStr}, length:${bodyLength}, ${host}`)
}
res.end(body)
}
})
const setCookieHeaders = proxyRes.headers['set-cookie'] || []
let datestr = ''
if (setCookieHeaders.length > 0) {
let date = new Date
date.setDate(date.getDate() + 1) // 一天之后过期
datestr = date.toUTCString()
}
const modifiedSetCookieHeaders = setCookieHeaders
.map(str => new cookiejar.Cookie(str))
.map(cookie => {
logSave(`cookie:${JSON.stringify(cookie)}`)
if (cookie.path && cookie.path[0] === '/') {
cookie.domain = `${serverName}`
cookie.expiration_date = datestr
cookie.path = `/${httpType}/${host}${cookie.path}`
}
cookie.secure = false
return cookie
Expand Down Expand Up @@ -352,7 +406,7 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
}
}
let timestr = new Date().toISOString()
console.log(`[${timestr}] route:${fwdStr}, httpType:${httpType}, host:${host}`)
console.log(`route:${fwdStr}, httpType:${httpType}, host:${host}`)
if (host.indexOf(serverName) !== -1 || // we cannot request resource from proxy itself
host == '' || host.indexOf('.') === -1 || (fwdStr && fwdStr.split(',').length > 3)) { // too many forwardings
res.status(404).send("{}")
Expand All @@ -368,9 +422,16 @@ let Proxy = ({blockedSites, urlModify, httpprefix, serverName, port, cookieDomai
let newpath = req.url.replace(`/${httpType}/${host}`, '') || '/'
logSave(`httpType:${httpType}, host:${host}, req.url:${req.url}, req.headers:${JSON.stringify(req.headers)}`)
Object.keys(req.headers).forEach(function (key) {
if (key.indexOf('x-') === 0) {
// remove nginx/cloudflare/pornhub related headers
if ((host.indexOf('twitter.com') === -1 && key.indexOf('x-') === 0) ||
key.indexOf('sec-fetch') === 0 ||
key.indexOf('cf-') === 0) {
logSave(`remove key=${key},`)
proxyReq.removeHeader(key)
if (key === 'sec-fetch-mode') {
proxyReq.setHeader('sec-fetch-mode', 'cors')
}
return
}
logSave(`set key=${key},`)
proxyReq.setHeader(key, req.headers[key])
Expand Down
27 changes: 14 additions & 13 deletions config.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const {CookieAccessInfo, CookieJar, Cookie} = cookiejar

let config = {
httpprefix: 'https', port: 443,
serverName: 'siteproxy.netptop.com',
serverName: 'proxy.netptop.com',
}
let blockedSites = ['merlinblog.xyz']

Expand Down Expand Up @@ -64,17 +64,12 @@ const locationReplaceMap302 = ({location, serverName, httpprefix, host, httpType
}

const regReplaceMap = {
'"//([-a-z0-9A-Z.]+)': `"//${serverName}:${port}/https/$1`, // default use https
'\'//([-a-z0-9A-Z.]+)': `'//${serverName}:${port}/https/$1`,// default use https
'(["\'])//([-a-z0-9A-Z.]+)': `$1//${serverName}:${port}/https/$2`, // default use https
'url[(]//([-a-z0-9A-Z.]+)': `url(//${serverName}:${port}/https/$1`,// default use https
'https:(././)([-a-z0-9A-Z.]+)': `${httpprefix}:$1${serverName}:${port}\\/https\\/$2`,
'http:(././)([-a-z0-9A-Z.]+)': `${httpprefix}:$1${serverName}:${port}\\/http\\/$2`,
'https://([-a-z0-9A-Z.]+)': `${httpprefix}://${serverName}:${port}/https/$1`,
'http://([-a-z0-9A-Z.]+)': `${httpprefix}://${serverName}:${port}/http/$1`,
'https%3a%2f%2f([-a-z0-9A-Z]+?)': `${httpprefix}%3a%2f%2f${serverName}%3a${port}%2fhttps%2f$1`,
'http%3a%2f%2f([-a-z0-9A-Z]+?)': `${httpprefix}%3a%2f%2f${serverName}%3a${port}%2fhttp%2f$1`,
'https%3A%2F%2F([-a-z0-9A-Z]+?)': `${httpprefix}%3A%2F%2F${serverName}%3A${port}%2Fhttps%2F$1`,
'http%3A%2F%2F([-a-z0-9A-Z]+?)': `${httpprefix}%3A%2F%2F${serverName}%3A${port}%2Fhttp%2F$1`,
'(http[s]?):(\\\\/)\\\\/([-a-z0-9A-Z])': `${httpprefix}:$2$2${serverName}:${port}$2$1$2$3`,
'(http[s]?)://([-a-z0-9A-Z])': `${httpprefix}://${serverName}:${port}/$1/$2`,
'(http[s]?)(%3[aA])(%2[fF])%2[fF]([-a-z0-9A-Z])': `${httpprefix}$2$3$3${serverName}$2${port}$3$1$3$4`,
'"(http[s]?)://"': `"${httpprefix}://${serverName}:${port}/https/"`,
' integrity=".+?"': '', // remove integrity
}

Expand Down Expand Up @@ -146,7 +141,7 @@ const siteSpecificReplace = {
'href="/https/www.google.com/g(.;)': 'href="/g$1',
'[\(]"/url': `\("/https/www.google.com/url`, //s_Gj("/url?sa=t&source=web&rct=j");s_Nj
'"/url"': `"/https/www.google.com/url"`,
// 'f="/"[+]f': `f="/https/www.google.com/"\+f`,
'f="/"[+]f': `f="/https/www.google.com/"\+f`, // mobile next page issue.
},
'www.gstatic.com': {
'href="/https/www.gstatic.com/g(.;)': 'href="/g$1',
Expand Down Expand Up @@ -234,7 +229,7 @@ const siteSpecificReplace = {
'web.telegram.org': {
'"pluto"': `"${serverName}:${port}/https/pluto"`,
'"venus"': `"${serverName}:${port}/https/venus"`,
'"aurora"': `"${serverName}:${port}/https/aurora"`,
'"aurora"':`"${serverName}:${port}/https/aurora"`,
'"vesta"': `"${serverName}:${port}/https/vesta"`,
'"flora"': `"${serverName}:${port}/https/flora"`,
' href=([\"\']?)([-a-z0-9_]+?)': ` href=$1/https/web.telegram.org/$2`,
Expand All @@ -244,6 +239,12 @@ const siteSpecificReplace = {
'doubibackup.com': {
' href=([\"\']?)([-a-z0-9_]+?)': ` href=$1/https/doubibackup.com/$2`,
' src=("[-a-z0-9_]+?)': ` src=/https/doubibackup.com/$1`,
},
'pornhub.com': {
'"/dv.p"([ ]?[+][ ]?"hn")': `"/https/www.pornhub.com/dv.p"[ ]?[+][ ]?"$1`,
},
'phncdn.com': {
// '("[:]?//)': `$1${serverName}:${port}/https/`, // default to https
}
}

Expand Down
Loading

0 comments on commit 921459a

Please sign in to comment.