看得透又看得远者prevail. ppt.cc/flUmLx ppt.cc/fqtgqx ppt.cc/fZsXUx ppt.cc/fhWnZx ppt.cc/fnrkVx ppt.cc/f2CBVx: GFWList 兼容 Squid

Thursday, 25 February 2016

GFWList 兼容 Squid

这两天在玩 Squid，一个功能颇为丰富的网络代理（缓存）软件。
GFWList 是一个 AutoProxy 维护的一个列表，顾名思义。

Squid 通过 ACL 为每个访问分类，为每个请求分类，控制行为。与目的网站相关的有：dstdomain（目标域名）、dstdom_regex（目标域名，正则表达式）和dst_as（目标 AS 号）等，具体用法见官方文档。

GFWList 使用的是 Adblock 一样的格式，给它全转成正则表达式，然后使用dstdom_regex匹配就好了。写了个转换脚本（gfwlist2regex.py），运行后自动下载转换生成黑白名单，方便日后更新。

#!/usr/bin/env python
#encoding: utf-8
import urllib2
from base64 import b64decode


LIST_URL   = 'https://autoproxy-gfwlist.googlecode.com/svn/trunk/gfwlist.txt'
BLACK_FILE = 'gfw.url_regex.lst'
WHITE_FILE = 'cn.url_regex.lst'

def convert_line(line):
    if line[0] == '/' and line[-1] == '/':
        return line[1:-1]
        
    line = line.replace('*', '.+')
    line = line.replace('(', r'\(').replace(')', r'\)')
    if line.startswith('||'):
        return '^https?:\/\/%s.*' % line[2:]  
    elif line.startswith('|'):
        return '^%s.*' % line[1:]
    elif line[-1] == '|':
        return '.*%s$' % line
    else:
        return '.*%s.*' % line

        
def convert(gfwlist):
    black = open(BLACK_FILE, 'w')
    white = open(WHITE_FILE, 'w')
    
    for l in gfwlist.split('\n'):
        l = l[:-1]
        if not l or l[0] == '!' or l[0] == '[':
            continue
            
        if l.startswith('@@'):
            white.write(convert_line(l[2:]) + '\n')
        else:
            black.write(convert_line(l) + '\n')

            
def main():
    src = urllib2.urlopen(LIST_URL).read()
    src = b64decode(src)
    convert(src)
             
if __name__ == '__main__':
    main()

view raw gfwlist2regex.py hosted with

by GitHub

修改 Squid 设置，例如这样：

# ...
acl cn    dstdom_regex 'cn.url_regex.lst'
acl gfwed dstdom_regex 'gfw.url_regex.lst'
 
prefer_direct on
  # 默认直连
never_direct  allow gfwed
never_direct  deny  cn
  # 白名单直连失败将尝试使用代理；如使用
  # always_direct allow cn 将禁止白名单使用代理。
 
cache_peer localhost parent 1234 0 name=s1 weight=5
cache_peer localhost parent 4321 0 name=s2 weight=10
# ...

黑名单里的必须走上级代理（cache_peer）；其余先尝试直接访问，如果被重置，将会转而尝试上级代理。挺智能的。

from https://blog.sorz.org/p/squid-blacklist/

	#!/usr/bin/env python
	#encoding: utf-8
	import urllib2
	from base64 import b64decode


	LIST_URL = 'https://autoproxy-gfwlist.googlecode.com/svn/trunk/gfwlist.txt'
	BLACK_FILE = 'gfw.url_regex.lst'
	WHITE_FILE = 'cn.url_regex.lst'

	def convert_line(line):
	if line[0] == '/' and line[-1] == '/':
	return line[1:-1]

	line = line.replace('*', '.+')
	line = line.replace('(', r'\(').replace(')', r'\)')
	if line.startswith('\|\|'):
	return '^https?:\/\/%s.*' % line[2:]
	elif line.startswith('\|'):
	return '^%s.*' % line[1:]
	elif line[-1] == '\|':
	return '.*%s$' % line
	else:
	return '.%s.' % line


	def convert(gfwlist):
	black = open(BLACK_FILE, 'w')
	white = open(WHITE_FILE, 'w')

	for l in gfwlist.split('\n'):
	l = l[:-1]
	if not l or l[0] == '!' or l[0] == '[':
	continue

	if l.startswith('@@'):
	white.write(convert_line(l[2:]) + '\n')
	else:
	black.write(convert_line(l) + '\n')


	def main():
	src = urllib2.urlopen(LIST_URL).read()
	src = b64decode(src)
	convert(src)

	if __name__ == '__main__':
	main()

Total Pageviews

Thursday, 25 February 2016

GFWList 兼容 Squid