Repository: cyfdecyf/cow Branch: master Commit: 41c0fb157c8b Files: 54 Total size: 228.4 KB Directory structure: gitextract_ox61ez_z/ ├── .gitignore ├── .travis.yml ├── CHANGELOG ├── LICENSE ├── README-en.md ├── README.md ├── auth.go ├── auth_test.go ├── config.go ├── config_test.go ├── config_unix.go ├── config_windows.go ├── conn_pool.go ├── conn_pool_test.go ├── doc/ │ ├── implementation.md │ ├── init.d/ │ │ └── cow │ ├── logrotate.d/ │ │ └── cow │ ├── osx/ │ │ └── info.chenyufei.cow.plist │ └── sample-config/ │ ├── rc │ └── rc-en ├── error.go ├── estimate_timeout.go ├── http.go ├── http_test.go ├── install-cow.sh ├── log.go ├── main.go ├── main_unix.go ├── main_windows.go ├── pac.go ├── pac.js ├── parent_proxy.go ├── proxy.go ├── proxy_test.go ├── proxy_unix.go ├── proxy_windows.go ├── script/ │ ├── README.md │ ├── build.sh │ ├── debugrc │ ├── httprc │ ├── log-group-by-client.sh │ ├── set-version.sh │ ├── test.sh │ └── upload.sh ├── site_blocked.go ├── site_direct.go ├── sitestat.go ├── sitestat_test.go ├── ssh.go ├── stat.go ├── testdata/ │ └── file ├── timeoutset.go ├── util.go └── util_test.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.sublime* cow-proxy bin ================================================ FILE: .travis.yml ================================================ language: go go: - 1.4.2 env: - TRAVIS="yes" install: - go get github.com/shadowsocks/shadowsocks-go/shadowsocks - go get github.com/cyfdecyf/bufio - go get github.com/cyfdecyf/leakybuf - go get github.com/cyfdecyf/color script: - pushd $TRAVIS_BUILD_DIR - go test -v - ./script/test.sh - popd ================================================ FILE: CHANGELOG ================================================ 0.9.8 (2016-06-19) * Fix OTA support bug in shadowsocks (report by @defia) * Fix WeChat image url problem (by @breath-co2 @haha1903) * Fix connection reset detection (by @fgid) 0.9.7 (2016-05-04) * Support shadowsocks OTA 0.9.6 (2015-06-07) * Reload config by sending SIGUSR1 on Unix system * Load blocked/direct/stat file from same directory as rc file by default * Allow user to specify blocked/direct/stat file path * Detect arm without vfp in install script. * Fix estimate timeout bug 0.9.5 (2015-05-12) * Support new encryption method "chacha20" and "salsa20" * Avoid biased parent proxy selection for hash load balacing * Fix AirDrop on OS X when using PAC * Fix failed start with corrupted stat file * Support changing the estimate timeout target 0.9.4 (2014-10-08) * Bug fix (#179): close stat file after load 0.9.3 (2014-09-21) * Support new encryption method "rc4-md5" 0.9.2 (2014-07-23) * Reduce the possibility of encountering too many open file error * New connection latency based load balancing * Fix auto load plist for OS X * Identify blocked site by HTTP error code 0.9.1 (2013-12-20) * Fix can't save site stat bug * Improve install and startup script 0.9 (2013-12-02) * New feature: two COW servers can be connected using encrypted connection, thus we have an encrypted HTTP proxy chain that can be used to bypass the firewall * Allow client to use HTTP basic authentication * Simplify configuration syntax * Better reuse for HTTP parent connections * Reduce direct/blocked delta * Generate new PAC every minute 0.8 (2013-08-10) * Share server connections between different clients * Add tunnelAllowedPort option to limit ports CONNECT method can connect to * Avoid timeout too soon for frequently visited direct sites * Fix reporting malformed requests in two cases when request has body: - Authenticate requests - Error occurred before request is sent * Support multi-lined headers * Change client connection timeout to 15s * Change as direct delta to 15 * Provide ARMv5 binary 0.7.6 (2013-07-28) * Fix bug for close connection response with no body * Fix response not keep alive by default * Always try parent proxy upon DNS/connection error * Do not take special handling on log with debug option * Add proxy status statistics in debug code 0.7.5 (2013-07-25) * Fix crash on IPv6 client authentication * Provide ARMv6 binary 0.7.4 (2013-07-15) * Fix adding extra connection header for client request with both "Proxy-Connection" and "Connection" headers * Ignore UTF-8 BOM in config file 0.7.3 (2013-07-10) * Handle 100-continue: do not forward expect header from client, ignore 100 continue response replied by some web servers * For windows: add cow-hide.exe to run cow.exe as background process, (provided by to xupefei) * Filter sites covered by user specified domains on load * Fix incorrectly changing header value to lower case: user name and password can now contain upper case letters 0.7.2 (2013-07-01) * Close idle server connections earlier: avoid opening too many sockets * Support authenticating multiple users (can limit port for each user) 0.7.1 (2013-06-08) * Fix parent proxy fallback bug 0.7 (2013-06-07) * Always use direct connection only for private IP addresses * Support multiple HTTP/SOCKS5 parent proxies * Support running multiple ssh server * Fix client request read timeout handling * Refactor parent proxy related code 0.6.3 (2013-05-27) * Support more shadowsocks encryption method * Fix several windows network error detection issues (dirty hack) 0.6.2 (2013-05-17) * Support multiple shadowsocks servers * Simple load balancing: backup or hash strategy * PAC fix: do not add domains with blocked host/sub domain * Remove some no longer working command line options 0.6.1 (2013-03-14) * Avoid using too much memory to hold http requests * Support http parent proxy basic authentication * For windows: add cow-taskbar.exe to hide cmd window to status area * Fix timeout error detection * Some bug fixes 0.6 (2013-03-03) * Allow user to specify proxy address in PAC * Performance optimization * More tolerant with HTTP servers and clients * Some bug fixes 0.5.1 (2013-02-10) * Handle blocked site that will return EOF * Small bug fixes 0.5 (2013-02-07) * Support parent HTTP proxy (such as goagent) * Work more automatically: because of this, updateBlocked, updateDirect, autoRetry options and chou file are removed * Record direct/blocked visit count to make blocked/direct site handling more reliable * Builtin common blocked/direct site list * Periodically estimate timeout value to avoid considering direct site as blocked with bad network connection * Support specifying host in blocked/direct file * User configurable timeout * Better windows support: connection reset, timeout and DNS error detection tested and works on XP * Support listening multiple addresses * Support IP based and user password authentication * Various bug fixes 0.3.5 (2012-12-23) * Performance improvement by better buffer usage * Allow specifying config file on command line * Better windows support: Config and domain list file on windows are put in the same directory as COW's binary. And they all have txt extension for easy editing * Bug fix: convert HTTP/1.0 response to HTTP/1.1 0.3.4 (2012-12-09) * Support shadowsocks * Reduce latency (maybe just a little, not measured) * Allow specifying ssh server port in config file * Bug fix: crash when handling flush error * Bug fix: correctly handle web servers which use closed connection to indicate end of response 0.3.3 (2012-12-05) * Keep HTTP CONNECT connection open. Avoid problems for Application which uses long connection. * Bug fix: crash when printing domain list inconsistency message. ================================================ FILE: LICENSE ================================================ Copyright (c) 2012-2013 Chen Yufei. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README-en.md ================================================ # COW (Climb Over the Wall) proxy COW is a HTTP proxy to simplify bypassing the great firewall. It tries to automatically identify blocked websites and only use parent proxy for those sites. Current version: 0.9.8 [CHANGELOG](CHANGELOG) [![Build Status](https://travis-ci.org/cyfdecyf/cow.png?branch=master)](https://travis-ci.org/cyfdecyf/cow) ## Features - As a HTTP proxy, can be used by mobile devices - Supports HTTP, SOCKS5, [shadowsocks](https://github.com/clowwindy/shadowsocks/wiki/Shadowsocks-%E4%BD%BF%E7%94%A8%E8%AF%B4%E6%98%8E) and COW itself as parent proxy - Supports simple load balancing between multiple parent proxies - Automatically identify blocked websites, only use parent proxy for those sites - Generate and serve PAC file for browser to bypass COW for best performance - Contain domains that can be directly accessed (recorded accoring to your visit history) # Quickstart Install: - **OS X, Linux (x86, ARM):** Run the following command (also for update) curl -L git.io/cow | bash - All binaries are compiled on OS X, if ARM binary can't work, please download [Go ARM](https://storage.googleapis.com/golang/go1.6.2.linux-amd64.tar.gz) and install from source. - **Windows:** download from the [release page](https://github.com/cyfdecyf/cow/releases) - If you are familiar with Go, run `go get github.com/cyfdecyf/cow` to install from source. Modify configuration file `~/.cow/rc` (OS X or Linux) or `rc.txt` (Windows). A simple example with the most important options: # Line starting with # is comment and will be ignored # Local proxy listen address listen = http://127.0.0.1:7777 # SOCKS5 parent proxy proxy = socks5://127.0.0.1:1080 # HTTP parent proxy proxy = http://127.0.0.1:8080 proxy = http://user:password@127.0.0.1:8080 # shadowsocks parent proxy proxy = ss://aes-128-cfb:password@1.2.3.4:8388 # cow parent proxy proxy = cow://aes-128-cfb:password@1.2.3.4:8388 See [detailed configuration example](doc/sample-config/rc-en) for other features. The PAC file can be accessed at `http:///pac`, for the above example: `http://127.0.0.1:7777/pac`. Command line options can override options in the configuration file For more details, see the output of `cow -h` ## Blocked and directly accessible sites list In ideal situation, you don't need to specify which sites are blocked and which are not, but COW hasen't reached that goal. So you may need to manually specify this if COW made the wrong judgement. - `/blocked` for blocked sites - `/direct` for directly accessible sites - One line for each domain - `google.com` means `*.google.com` - You can use domains like `google.com.hk` # Technical details ## Visited site recording COW records all visited hosts and visit count in `stat` (which is a json file) under the same directory with config file. - **For unknown site, first try direct access, use parent proxy upon failure. After 2 minutes, try direct access again** - Builtin [common blocked site](site_blocked.go) in order to reduce time to discover blockage and the use parent proxy - Hosts will be put into PAC after a few times of successful direct visit - Hosts will use parent proxy if direct access failed for a few times - To avoid mistakes, will try direct access with some probability - Host will be deleted if not visited for a few days - Hosts under builtin/manually specified blocked and direct domains will not appear in `stat` ## How does COW detect blocked sites Upon the following error, one domain is considered to be blocked - Server connection reset - Connection to server timeout - Read from server timeout COW will retry HTTP request upon these errors, But if there's some data sent back to the client, connection with the client will be dropped to signal error.. Server connection reset is usually reliable in detecting blocked sites. But timeout is not. COW tries to estimate timeout value every 30 seconds, in order to avoid considering normal sites as blocked when network condition is bad. Revert to direct access after two minutes upon first blockage is also to avoid mistakes. If automatica timeout retry causes problem for you, try to change `readTimeout` and `dialTimeout` in configuration. # Limitations - No caching, COW just passes traffic between clients and web servers - For web browsing, browsers have their own cache - Blocked site detection is not always reliable # Acknowledgements Refer to [README.md](README.md). ================================================ FILE: README.md ================================================ # COW (Climb Over the Wall) proxy COW 是一个简化穿墙的 HTTP 代理服务器。它能自动检测被墙网站,仅对这些网站使用二级代理。 [English README](README-en.md). 当前版本:0.9.8 [CHANGELOG](CHANGELOG) [![Build Status](https://travis-ci.org/cyfdecyf/cow.png?branch=master)](https://travis-ci.org/cyfdecyf/cow) **欢迎在 develop branch 进行开发并发送 pull request :)** ## 功能 COW 的设计目标是自动化,理想情况下用户无需关心哪些网站无法访问,可直连网站也不会因为使用二级代理而降低访问速度。 - 作为 HTTP 代理,可提供给移动设备使用;若部署在国内服务器上,可作为 APN 代理 - 支持 HTTP, SOCKS5, [shadowsocks](https://github.com/clowwindy/shadowsocks/wiki/Shadowsocks-%E4%BD%BF%E7%94%A8%E8%AF%B4%E6%98%8E) 和 cow 自身作为二级代理 - 可使用多个二级代理,支持简单的负载均衡 - 自动检测网站是否被墙,仅对被墙网站使用二级代理 - 自动生成包含直连网站的 PAC,访问这些网站时可绕过 COW - 内置[常见可直连网站](site_direct.go),如国内社交、视频、银行、电商等网站(可手工添加) # 快速开始 安装: - **OS X, Linux (x86, ARM):** 执行以下命令(也可用于更新) curl -L git.io/cow | bash - 环境变量 `COW_INSTALLDIR` 可以指定安装的路径,若该环境变量不是目录则询问用户 - 所有 binary 在 OS X 上编译获得,若 ARM 版本可能无法工作,请下载 [Go ARM](https://storage.googleapis.com/golang/go1.6.2.linux-amd64.tar.gz) 后从源码安装 - **Windows:** 从 [release 页面](https://github.com/cyfdecyf/cow/releases)下载 - 熟悉 Go 的用户可用 `go get github.com/cyfdecyf/cow` 从源码安装 编辑 `~/.cow/rc` (Linux) 或 `rc.txt` (Windows),简单的配置例子如下: #开头的行是注释,会被忽略 # 本地 HTTP 代理地址 # 配置 HTTP 和 HTTPS 代理时请填入该地址 # 若配置代理时有对所有协议使用该代理的选项,且你不清楚此选项的含义,请勾选 # 或者在自动代理配置中填入 http://127.0.0.1:7777/pac listen = http://127.0.0.1:7777 # SOCKS5 二级代理 proxy = socks5://127.0.0.1:1080 # HTTP 二级代理 proxy = http://127.0.0.1:8080 proxy = http://user:password@127.0.0.1:8080 # shadowsocks 二级代理 proxy = ss://aes-128-cfb:password@1.2.3.4:8388 # cow 二级代理 proxy = cow://aes-128-cfb:password@1.2.3.4:8388 使用 cow 协议的二级代理需要在国外服务器上安装 COW,并使用如下配置: listen = cow://aes-128-cfb:password@0.0.0.0:8388 完成配置后启动 COW 并配置好代理即可使用。 # 详细使用说明 配置文件在 Unix 系统上为 `~/.cow/rc`,Windows 上为 COW 所在目录的 `rc.txt` 文件。 **[样例配置](doc/sample-config/rc) 包含了所有选项以及详细的说明**,建议下载然后修改。 启动 COW: - Unix 系统在命令行上执行 `cow &` (若 COW 不在 `PATH` 所在目录,请执行 `./cow &`) - [Linux 启动脚本](doc/init.d/cow),如何使用请参考注释(Debian 测试通过,其他 Linux 发行版应该也可使用) - Windows - 双击 `cow-taskbar.exe`,隐藏到托盘执行 - 双击 `cow-hide.exe`,隐藏为后台程序执行 - 以上两者都会启动 `cow.exe` PAC url 为 `http:///pac`,也可将浏览器的 HTTP/HTTPS 代理设置为 `listen address` 使所有网站都通过 COW 访问。 **使用 PAC 可获得更好的性能,但若 PAC 中某网站从直连变成被封,浏览器会依然尝试直连。遇到这种情况可以暂时不使用 PAC 而总是走 HTTP 代理,让 COW 学习到新的被封网站。** 命令行选项可以覆盖部分配置文件中的选项、打开 debug/request/reply 日志,执行 `cow -h` 来获取更多信息。 ## 手动指定被墙和直连网站 **一般情况下无需手工指定被墙和直连网站,该功能只是是为了处理特殊情况和性能优化。** 配置文件所在目录下的 `blocked` 和 `direct` 可指定被墙和直连网站(`direct` 中的 host 会添加到 PAC)。 Windows 下文件名为 `blocked.txt` 和 `direct.txt`。 - 每行一个域名或者主机名(COW 会先检查主机名是否在列表中,再检查域名) - 二级域名如 `google.com` 相当于 `*.google.com` - `com.hk`, `edu.cn` 等二级域名下的三级域名,作为二级域名处理。如 `google.com.hk` 相当于 `*.google.com.hk` - 其他三级及以上域名/主机名做精确匹配,例如 `plus.google.com` # 技术细节 ## 访问网站记录 COW 在配置文件所在目录下的 `stat` json 文件中记录经常访问网站被墙和直连访问的次数。 - **对未知网站,先尝试直接连接,失败后使用二级代理重试请求,2 分钟后再尝试直接** - 内置[常见被墙网站](site_blocked.go),减少检测被墙所需时间(可手工添加) - 直连访问成功一定次数后相应的 host 会添加到 PAC - host 被墙一定次数后会直接用二级代理访问 - 为避免误判,会以一定概率再次尝试直连访问 - host 若一段时间没有访问会自动被删除(避免 `stat` 文件无限增长) - 内置网站列表和用户指定的网站不会出现在统计文件中 ## COW 如何检测被墙网站 COW 将以下错误认为是墙在作怪: - 服务器连接被重置 (connection reset) - 创建连接超时 - 服务器读操作超时 无论是普通的 HTTP GET 等请求还是 CONNECT 请求,失败后 COW 都会自动重试请求。(如果已经有内容发送回 client 则不会重试而是直接断开连接。) 用连接被重置来判断被墙通常来说比较可靠,超时则不可靠。COW 每隔半分钟会尝试估算合适的超时间隔,避免在网络连接差的情况下把直连网站由于超时也当成被墙。 COW 默认配置下检测到被墙后,过两分钟再次尝试直连也是为了避免误判。 如果超时自动重试给你造成了问题,请参考[样例配置](doc/sample-config/rc)高级选项中的 `readTimeout`, `dialTimeout` 选项。 ## 限制 - 不提供 cache - 不支持 HTTP pipeline(Chrome, Firefox 默认都没开启 pipeline,支持这个功能容易增加问题而好处并不明显) # 致谢 (Acknowledgements) 贡献代码: - @fzerorubigd: various bug fixes and feature implementation - @tevino: http parent proxy basic authentication - @xupefei: 提供 cow-hide.exe 以在 windows 上在后台执行 cow.exe - @sunteya: 改进启动和安装脚本 Bug reporter: - GitHub users: glacjay, trawor, Blaskyy, lucifer9, zellux, xream, hieixu, fantasticfears, perrywky, JayXon, graminc, WingGao, polong, dallascao, luosheng - Twitter users: 特别感谢 @shao222 多次帮助测试新版并报告了不少 bug, @xixitalk @glacjay 对 0.3 版本的 COW 提出了让它更加自动化的建议,使我重新考虑 COW 的设计目标并且改进成 0.5 版本之后的工作方式。 ================================================ FILE: auth.go ================================================ package main import ( "bytes" "encoding/base64" "errors" "fmt" "github.com/cyfdecyf/bufio" "net" "os" "strconv" "strings" "text/template" "time" ) const ( authRealm = "cow proxy" authRawBodyTmpl = ` COW Proxy

407 Proxy authentication required


Generated by COW ` ) type netAddr struct { ip net.IP mask net.IPMask } type authUser struct { // user name is the key to auth.user, no need to store here passwd string ha1 string // used in request digest, initialized ondemand port uint16 // 0 means any port } var auth struct { required bool user map[string]*authUser allowedClient []netAddr authed *TimeoutSet // cache authenticated users based on ip template *template.Template } func (au *authUser) initHA1(user string) { if au.ha1 == "" { au.ha1 = md5sum(user + ":" + authRealm + ":" + au.passwd) } } func parseUserPasswd(userPasswd string) (user string, au *authUser, err error) { arr := strings.Split(userPasswd, ":") n := len(arr) if n == 1 || n > 3 { err = errors.New("user password: " + userPasswd + " syntax wrong, should be username:password[:port]") return } user, passwd := arr[0], arr[1] if user == "" || passwd == "" { err = errors.New("user password " + userPasswd + " should not contain empty user name or password") return "", nil, err } var port int if n == 3 && arr[2] != "" { port, err = strconv.Atoi(arr[2]) if err != nil || port <= 0 || port > 0xffff { err = errors.New("user password: " + userPasswd + " invalid port") return "", nil, err } } au = &authUser{passwd, "", uint16(port)} return user, au, nil } func parseAllowedClient(val string) { if val == "" { return } arr := strings.Split(val, ",") auth.allowedClient = make([]netAddr, len(arr)) for i, v := range arr { s := strings.TrimSpace(v) ipAndMask := strings.Split(s, "/") if len(ipAndMask) > 2 { Fatal("allowedClient syntax error: client should be the form ip/nbitmask") } ip := net.ParseIP(ipAndMask[0]) if ip == nil { Fatalf("allowedClient syntax error %s: ip address not valid\n", s) } var mask net.IPMask if len(ipAndMask) == 2 { nbit, err := strconv.Atoi(ipAndMask[1]) if err != nil { Fatalf("allowedClient syntax error %s: %v\n", s, err) } if nbit > 32 { Fatal("allowedClient error: mask number should <= 32") } mask = NewNbitIPv4Mask(nbit) } else { mask = NewNbitIPv4Mask(32) } auth.allowedClient[i] = netAddr{ip.Mask(mask), mask} } } func addUserPasswd(val string) { if val == "" { return } user, au, err := parseUserPasswd(val) debug.Println("user:", user, "port:", au.port) if err != nil { Fatal(err) } if _, ok := auth.user[user]; ok { Fatal("duplicate user:", user) } auth.user[user] = au } func loadUserPasswdFile(file string) { if file == "" { return } f, err := os.Open(file) if err != nil { Fatal("error opening user passwd fle:", err) } r := bufio.NewReader(f) s := bufio.NewScanner(r) for s.Scan() { addUserPasswd(s.Text()) } f.Close() } func initAuth() { if config.UserPasswd != "" || config.UserPasswdFile != "" || config.AllowedClient != "" { auth.required = true } else { return } auth.user = make(map[string]*authUser) addUserPasswd(config.UserPasswd) loadUserPasswdFile(config.UserPasswdFile) parseAllowedClient(config.AllowedClient) auth.authed = NewTimeoutSet(time.Duration(config.AuthTimeout) * time.Hour) rawTemplate := "HTTP/1.1 407 Proxy Authentication Required\r\n" + "Proxy-Authenticate: Digest realm=\"" + authRealm + "\", nonce=\"{{.Nonce}}\", qop=\"auth\"\r\n" + "Content-Type: text/html\r\n" + "Cache-Control: no-cache\r\n" + "Content-Length: " + fmt.Sprintf("%d", len(authRawBodyTmpl)) + "\r\n\r\n" + authRawBodyTmpl var err error if auth.template, err = template.New("auth").Parse(rawTemplate); err != nil { Fatal("internal error generating auth template:", err) } } // Return err = nil if authentication succeed. nonce would be not empty if // authentication is needed, and should be passed back on subsequent call. func Authenticate(conn *clientConn, r *Request) (err error) { clientIP, _, _ := net.SplitHostPort(conn.RemoteAddr().String()) if auth.authed.has(clientIP) { debug.Printf("%s has already authed\n", clientIP) return } if authIP(clientIP) { // IP is allowed return } err = authUserPasswd(conn, r) if err == nil { auth.authed.add(clientIP) } return } // authIP checks whether the client ip address matches one in allowedClient. // It uses a sequential search. func authIP(clientIP string) bool { ip := net.ParseIP(clientIP) if ip == nil { panic("authIP should always get IP address") } for _, na := range auth.allowedClient { if ip.Mask(na.mask).Equal(na.ip) { debug.Printf("client ip %s allowed\n", clientIP) return true } } return false } func genNonce() string { buf := new(bytes.Buffer) fmt.Fprintf(buf, "%x", time.Now().Unix()) return buf.String() } func calcRequestDigest(kv map[string]string, ha1, method string) string { // Refer to rfc2617 section 3.2.2.1 Request-Digest arr := []string{ ha1, kv["nonce"], kv["nc"], kv["cnonce"], "auth", md5sum(method + ":" + kv["uri"]), } return md5sum(strings.Join(arr, ":")) } func checkProxyAuthorization(conn *clientConn, r *Request) error { if debug { debug.Printf("cli(%s) authorization: %s\n", conn.RemoteAddr(), r.ProxyAuthorization) } arr := strings.SplitN(r.ProxyAuthorization, " ", 2) if len(arr) != 2 { return errors.New("auth: malformed ProxyAuthorization header: " + r.ProxyAuthorization) } authMethod := strings.ToLower(strings.TrimSpace(arr[0])) if authMethod == "digest" { return authDigest(conn, r, arr[1]) } else if authMethod == "basic" { return authBasic(conn, arr[1]) } return errors.New("auth: method " + arr[0] + " unsupported, must use digest") } func authPort(conn *clientConn, user string, au *authUser) error { if au.port == 0 { return nil } _, portStr, _ := net.SplitHostPort(conn.LocalAddr().String()) port, _ := strconv.Atoi(portStr) if uint16(port) != au.port { errl.Printf("cli(%s) auth: user %s port not match\n", conn.RemoteAddr(), user) return errAuthRequired } return nil } func authBasic(conn *clientConn, userPasswd string) error { b64, err := base64.StdEncoding.DecodeString(userPasswd) if err != nil { return errors.New("auth:" + err.Error()) } arr := strings.Split(string(b64), ":") if len(arr) != 2 { return errors.New("auth: malformed basic auth user:passwd") } user := arr[0] passwd := arr[1] au, ok := auth.user[user] if !ok || au.passwd != passwd { return errAuthRequired } return authPort(conn, user, au) } func authDigest(conn *clientConn, r *Request, keyVal string) error { authHeader := parseKeyValueList(keyVal) if len(authHeader) == 0 { return errors.New("auth: empty authorization list") } nonceTime, err := strconv.ParseInt(authHeader["nonce"], 16, 64) if err != nil { return fmt.Errorf("auth: nonce %v", err) } // If nonce time too early, reject. iOS will create a new connection to do // authentication. if time.Now().Sub(time.Unix(nonceTime, 0)) > time.Minute { return errAuthRequired } user := authHeader["username"] au, ok := auth.user[user] if !ok { errl.Printf("cli(%s) auth: no such user: %s\n", conn.RemoteAddr(), authHeader["username"]) return errAuthRequired } if err = authPort(conn, user, au); err != nil { return err } if authHeader["qop"] != "auth" { return errors.New("auth: qop wrong: " + authHeader["qop"]) } response, ok := authHeader["response"] if !ok { return errors.New("auth: no request-digest response") } au.initHA1(user) digest := calcRequestDigest(authHeader, au.ha1, r.Method) if response != digest { errl.Printf("cli(%s) auth: digest not match, maybe password wrong", conn.RemoteAddr()) return errAuthRequired } return nil } func authUserPasswd(conn *clientConn, r *Request) (err error) { if r.ProxyAuthorization != "" { // client has sent authorization header err = checkProxyAuthorization(conn, r) if err == nil { return } else if err != errAuthRequired { sendErrorPage(conn, statusBadReq, "Bad authorization request", err.Error()) return } // auth required to through the following } nonce := genNonce() data := struct { Nonce string }{ nonce, } buf := new(bytes.Buffer) if err := auth.template.Execute(buf, data); err != nil { return fmt.Errorf("error generating auth response: %v", err) } if bool(debug) && verbose { debug.Printf("authorization response:\n%s", buf.String()) } if _, err := conn.Write(buf.Bytes()); err != nil { return fmt.Errorf("send auth response error: %v", err) } return errAuthRequired } ================================================ FILE: auth_test.go ================================================ package main import ( "net" "testing" ) func TestParseUserPasswd(t *testing.T) { testData := []struct { val string user string au *authUser }{ {"foo:bar", "foo", &authUser{"bar", "", 0}}, {"foo:bar:-1", "", nil}, {"hello:world:", "hello", &authUser{"world", "", 0}}, {"hello:world:0", "", nil}, {"hello:world:1024", "hello", &authUser{"world", "", 1024}}, {"hello:world:65535", "hello", &authUser{"world", "", 65535}}, } for _, td := range testData { user, au, err := parseUserPasswd(td.val) if td.au == nil { if err == nil { t.Error(td.val, "should return error") } continue } if td.user != user { t.Error(td.val, "user should be:", td.user, "got:", user) } if td.au.passwd != au.passwd { t.Error(td.val, "passwd should be:", td.au.passwd, "got:", au.passwd) } if td.au.port != au.port { t.Error(td.val, "port should be:", td.au.port, "got:", au.port) } } } func TestCalcDigest(t *testing.T) { a1 := md5sum("cyf" + ":" + authRealm + ":" + "wlx") auth := map[string]string{ "nonce": "50ed159c3b707061418bbb14", "nc": "00000001", "cnonce": "6c46874228c087eb", "uri": "/", } const targetDigest = "bad1cb3526e4b257a62cda10f7c25aad" digest := calcRequestDigest(auth, a1, "GET") if digest != targetDigest { t.Errorf("authentication digest calculation wrong, got: %x, should be: %s\n", digest, targetDigest) } } func TestParseAllowedClient(t *testing.T) { parseAllowedClient("") // this should not cause error parseAllowedClient("192.168.1.1/16, 192.169.1.2") na := &auth.allowedClient[0] if !na.ip.Equal(net.ParseIP("192.168.0.0")) { t.Error("ParseAllowedClient 192.168.1.1/16 ip error, got ip:", na.ip) } mask := []byte(na.mask) if mask[0] != 0xff || mask[1] != 0xff || mask[2] != 0 || mask[3] != 0 { t.Error("ParseAllowedClient 192.168.1.1/16 mask error") } na = &auth.allowedClient[1] if !na.ip.Equal(net.ParseIP("192.169.1.2")) { t.Error("ParseAllowedClient 192.169.1.2 ip error") } mask = []byte(na.mask) if mask[0] != 0xff || mask[1] != 0xff || mask[2] != 0xff || mask[3] != 0xff { t.Error("ParseAllowedClient 192.169.1.2 mask error") } } func TestAuthIP(t *testing.T) { parseAllowedClient("192.168.0.0/16, 192.169.2.1, 10.0.0.0/8, 8.8.8.8") var testData = []struct { ip string allowed bool }{ {"10.1.2.3", true}, {"192.168.1.2", true}, {"192.169.2.1", true}, {"192.169.2.2", false}, {"8.8.8.8", true}, {"1.2.3.4", false}, } for _, td := range testData { if authIP(td.ip) != td.allowed { if td.allowed { t.Errorf("%s should be allowed\n", td.ip) } else { t.Errorf("%s should NOT be allowed\n", td.ip) } } } } ================================================ FILE: config.go ================================================ package main import ( "errors" "flag" "fmt" "net" "os" "path" "reflect" "strconv" "strings" "time" "github.com/cyfdecyf/bufio" ) const ( version = "0.9.8" defaultListenAddr = "127.0.0.1:7777" defaultEstimateTarget = "example.com" ) type LoadBalanceMode byte const ( loadBalanceBackup LoadBalanceMode = iota loadBalanceHash loadBalanceLatency ) // allow the same tunnel ports as polipo var defaultTunnelAllowedPort = []string{ "22", "80", "443", // ssh, http, https "873", // rsync "143", "220", "585", "993", // imap, imap3, imap4-ssl, imaps "109", "110", "473", "995", // pop2, pop3, hybrid-pop, pop3s "5222", "5269", // jabber-client, jabber-server "2401", "3690", "9418", // cvspserver, svn, git } type Config struct { RcFile string // config file LogFile string // path for log file AlwaysProxy bool // whether we should alwyas use parent proxy LoadBalance LoadBalanceMode // select load balance mode TunnelAllowedPort map[string]bool // allowed ports to create tunnel SshServer []string // authenticate client UserPasswd string UserPasswdFile string // file that contains user:passwd:[port] pairs AllowedClient string AuthTimeout time.Duration // advanced options DialTimeout time.Duration ReadTimeout time.Duration Core int DetectSSLErr bool HttpErrorCode int dir string // directory containing config file StatFile string // Path for stat file BlockedFile string // blocked sites specified by user DirectFile string // direct sites specified by user // not configurable in config file PrintVer bool EstimateTimeout bool // Whether to run estimateTimeout(). EstimateTarget string // Timeout estimate target site. // not config option saveReqLine bool // for http and cow parent, should save request line from client } var config Config var configNeedUpgrade bool // whether should upgrade config file func printVersion() { fmt.Println("cow version", version) } func initConfig(rcFile string) { config.dir = path.Dir(rcFile) config.BlockedFile = path.Join(config.dir, blockedFname) config.DirectFile = path.Join(config.dir, directFname) config.StatFile = path.Join(config.dir, statFname) config.DetectSSLErr = false config.AlwaysProxy = false config.AuthTimeout = 2 * time.Hour config.DialTimeout = defaultDialTimeout config.ReadTimeout = defaultReadTimeout config.TunnelAllowedPort = make(map[string]bool) for _, port := range defaultTunnelAllowedPort { config.TunnelAllowedPort[port] = true } config.EstimateTarget = defaultEstimateTarget } // Whether command line options specifies listen addr var cmdHasListenAddr bool func parseCmdLineConfig() *Config { var c Config var listenAddr string flag.StringVar(&c.RcFile, "rc", "", "config file, defaults to $HOME/.cow/rc on Unix, ./rc.txt on Windows") // Specifying listen default value to StringVar would override config file options flag.StringVar(&listenAddr, "listen", "", "listen address, disables listen in config") flag.IntVar(&c.Core, "core", 2, "number of cores to use") flag.StringVar(&c.LogFile, "logFile", "", "write output to file") flag.BoolVar(&c.PrintVer, "version", false, "print version") flag.BoolVar(&c.EstimateTimeout, "estimate", true, "enable/disable estimate timeout") flag.Parse() if c.RcFile == "" { c.RcFile = getDefaultRcFile() } else { c.RcFile = expandTilde(c.RcFile) } if err := isFileExists(c.RcFile); err != nil { Fatal("fail to get config file:", err) } initConfig(c.RcFile) if listenAddr != "" { configParser{}.ParseListen(listenAddr) cmdHasListenAddr = true // must come after parse } return &c } func parseBool(v, msg string) bool { switch v { case "true": return true case "false": return false default: Fatalf("%s should be true or false\n", msg) } return false } func parseInt(val, msg string) (i int) { var err error if i, err = strconv.Atoi(val); err != nil { Fatalf("%s should be an integer\n", msg) } return } func parseDuration(val, msg string) (d time.Duration) { var err error if d, err = time.ParseDuration(val); err != nil { Fatalf("%s %v\n", msg, err) } return } func checkServerAddr(addr string) error { _, _, err := net.SplitHostPort(addr) return err } func isUserPasswdValid(val string) bool { arr := strings.SplitN(val, ":", 2) if len(arr) != 2 || arr[0] == "" || arr[1] == "" { return false } return true } // proxyParser provides functions to parse different types of parent proxy type proxyParser struct{} func (p proxyParser) ProxySocks5(val string) { if err := checkServerAddr(val); err != nil { Fatal("parent socks server", err) } parentProxy.add(newSocksParent(val)) } func (pp proxyParser) ProxyHttp(val string) { var userPasswd, server string arr := strings.Split(val, "@") if len(arr) == 1 { server = arr[0] } else if len(arr) == 2 { userPasswd = arr[0] server = arr[1] } else { Fatal("http parent proxy contains more than one @:", val) } if err := checkServerAddr(server); err != nil { Fatal("parent http server", err) } config.saveReqLine = true parent := newHttpParent(server) parent.initAuth(userPasswd) parentProxy.add(parent) } // Parse method:passwd@server:port func parseMethodPasswdServer(val string) (method, passwd, server string, err error) { // Use the right-most @ symbol to seperate method:passwd and server:port. idx := strings.LastIndex(val, "@") if idx == -1 { err = errors.New("requires both encrypt method and password") return } methodPasswd := val[:idx] server = val[idx+1:] if err = checkServerAddr(server); err != nil { return } // Password can have : inside, but I don't recommend this. arr := strings.SplitN(methodPasswd, ":", 2) if len(arr) != 2 { err = errors.New("method and password should be separated by :") return } method = arr[0] passwd = arr[1] return } // parse shadowsocks proxy func (pp proxyParser) ProxySs(val string) { method, passwd, server, err := parseMethodPasswdServer(val) if err != nil { Fatal("shadowsocks parent", err) } parent := newShadowsocksParent(server) parent.initCipher(method, passwd) parentProxy.add(parent) } func (pp proxyParser) ProxyCow(val string) { method, passwd, server, err := parseMethodPasswdServer(val) if err != nil { Fatal("cow parent", err) } if err := checkServerAddr(server); err != nil { Fatal("parent cow server", err) } config.saveReqLine = true parent := newCowParent(server, method, passwd) parentProxy.add(parent) } // listenParser provides functions to parse different types of listen addresses type listenParser struct{} func (lp listenParser) ListenHttp(val string) { if cmdHasListenAddr { return } arr := strings.Fields(val) if len(arr) > 2 { Fatal("too many fields in listen = http://", val) } var addr, addrInPAC string addr = arr[0] if len(arr) == 2 { addrInPAC = arr[1] } if err := checkServerAddr(addr); err != nil { Fatal("listen http server", err) } addListenProxy(newHttpProxy(addr, addrInPAC)) } func (lp listenParser) ListenCow(val string) { if cmdHasListenAddr { return } method, passwd, addr, err := parseMethodPasswdServer(val) if err != nil { Fatal("listen cow", err) } addListenProxy(newCowProxy(method, passwd, addr)) } // configParser provides functions to parse options in config file. type configParser struct{} func (p configParser) ParseProxy(val string) { parser := reflect.ValueOf(proxyParser{}) zeroMethod := reflect.Value{} arr := strings.Split(val, "://") if len(arr) != 2 { Fatal("proxy has no protocol specified:", val) } protocol := arr[0] methodName := "Proxy" + strings.ToUpper(protocol[0:1]) + protocol[1:] method := parser.MethodByName(methodName) if method == zeroMethod { Fatalf("no such protocol \"%s\"\n", arr[0]) } args := []reflect.Value{reflect.ValueOf(arr[1])} method.Call(args) } func (p configParser) ParseListen(val string) { if cmdHasListenAddr { return } parser := reflect.ValueOf(listenParser{}) zeroMethod := reflect.Value{} var protocol, server string arr := strings.Split(val, "://") if len(arr) == 1 { protocol = "http" server = val configNeedUpgrade = true } else { protocol = arr[0] server = arr[1] } methodName := "Listen" + strings.ToUpper(protocol[0:1]) + protocol[1:] method := parser.MethodByName(methodName) if method == zeroMethod { Fatalf("no such listen protocol \"%s\"\n", arr[0]) } args := []reflect.Value{reflect.ValueOf(server)} method.Call(args) } func (p configParser) ParseLogFile(val string) { config.LogFile = expandTilde(val) } func (p configParser) ParseAddrInPAC(val string) { configNeedUpgrade = true arr := strings.Split(val, ",") for i, s := range arr { if s == "" { continue } s = strings.TrimSpace(s) host, _, err := net.SplitHostPort(s) if err != nil { Fatal("proxy address in PAC", err) } if host == "0.0.0.0" { Fatal("can't use 0.0.0.0 as proxy address in PAC") } if hp, ok := listenProxy[i].(*httpProxy); ok { hp.addrInPAC = s } else { Fatal("can't specify address in PAC for non http proxy") } } } func (p configParser) ParseTunnelAllowedPort(val string) { arr := strings.Split(val, ",") for _, s := range arr { s = strings.TrimSpace(s) if _, err := strconv.Atoi(s); err != nil { Fatal("tunnel allowed ports", err) } config.TunnelAllowedPort[s] = true } } func (p configParser) ParseSocksParent(val string) { var pp proxyParser pp.ProxySocks5(val) configNeedUpgrade = true } func (p configParser) ParseSshServer(val string) { arr := strings.Split(val, ":") if len(arr) == 2 { val += ":22" } else if len(arr) == 3 { if arr[2] == "" { val += "22" } } else { Fatal("sshServer should be in the form of: user@server:local_socks_port[:server_ssh_port]") } // add created socks server p.ParseSocksParent("127.0.0.1:" + arr[1]) config.SshServer = append(config.SshServer, val) } var http struct { parent *httpParent serverCnt int passwdCnt int } func (p configParser) ParseHttpParent(val string) { if err := checkServerAddr(val); err != nil { Fatal("parent http server", err) } config.saveReqLine = true http.parent = newHttpParent(val) parentProxy.add(http.parent) http.serverCnt++ configNeedUpgrade = true } func (p configParser) ParseHttpUserPasswd(val string) { if !isUserPasswdValid(val) { Fatal("httpUserPassword syntax wrong, should be in the form of user:passwd") } if http.passwdCnt >= http.serverCnt { Fatal("must specify httpParent before corresponding httpUserPasswd") } http.parent.initAuth(val) http.passwdCnt++ } func (p configParser) ParseAlwaysProxy(val string) { config.AlwaysProxy = parseBool(val, "alwaysProxy") } func (p configParser) ParseLoadBalance(val string) { switch val { case "backup": config.LoadBalance = loadBalanceBackup case "hash": config.LoadBalance = loadBalanceHash case "latency": config.LoadBalance = loadBalanceLatency default: Fatalf("invalid loadBalance mode: %s\n", val) } } func (p configParser) ParseStatFile(val string) { config.StatFile = expandTilde(val) } func (p configParser) ParseBlockedFile(val string) { config.BlockedFile = expandTilde(val) if err := isFileExists(config.BlockedFile); err != nil { Fatal("blocked file:", err) } } func (p configParser) ParseDirectFile(val string) { config.DirectFile = expandTilde(val) if err := isFileExists(config.DirectFile); err != nil { Fatal("direct file:", err) } } var shadow struct { parent *shadowsocksParent passwd string method string serverCnt int passwdCnt int methodCnt int } func (p configParser) ParseShadowSocks(val string) { if shadow.serverCnt-shadow.passwdCnt > 1 { Fatal("must specify shadowPasswd for every shadowSocks server") } // create new shadowsocks parent if both server and password are given // previously if shadow.parent != nil && shadow.serverCnt == shadow.passwdCnt { if shadow.methodCnt < shadow.serverCnt { shadow.method = "" shadow.methodCnt = shadow.serverCnt } shadow.parent.initCipher(shadow.method, shadow.passwd) } if val == "" { // the final call shadow.parent = nil return } if err := checkServerAddr(val); err != nil { Fatal("shadowsocks server", err) } shadow.parent = newShadowsocksParent(val) parentProxy.add(shadow.parent) shadow.serverCnt++ configNeedUpgrade = true } func (p configParser) ParseShadowPasswd(val string) { if shadow.passwdCnt >= shadow.serverCnt { Fatal("must specify shadowSocks before corresponding shadowPasswd") } if shadow.passwdCnt+1 != shadow.serverCnt { Fatal("must specify shadowPasswd for every shadowSocks") } shadow.passwd = val shadow.passwdCnt++ } func (p configParser) ParseShadowMethod(val string) { if shadow.methodCnt >= shadow.serverCnt { Fatal("must specify shadowSocks before corresponding shadowMethod") } // shadowMethod is optional shadow.method = val shadow.methodCnt++ } func checkShadowsocks() { if shadow.serverCnt != shadow.passwdCnt { Fatal("number of shadowsocks server and password does not match") } // parse the last shadowSocks option again to initialize the last // shadowsocks server parser := configParser{} parser.ParseShadowSocks("") } // Put actual authentication related config parsing in auth.go, so config.go // doesn't need to know the details of authentication implementation. func (p configParser) ParseUserPasswd(val string) { config.UserPasswd = val if !isUserPasswdValid(config.UserPasswd) { Fatal("userPassword syntax wrong, should be in the form of user:passwd") } } func (p configParser) ParseUserPasswdFile(val string) { err := isFileExists(val) if err != nil { Fatal("userPasswdFile:", err) } config.UserPasswdFile = val } func (p configParser) ParseAllowedClient(val string) { config.AllowedClient = val } func (p configParser) ParseAuthTimeout(val string) { config.AuthTimeout = parseDuration(val, "authTimeout") } func (p configParser) ParseCore(val string) { config.Core = parseInt(val, "core") } func (p configParser) ParseHttpErrorCode(val string) { config.HttpErrorCode = parseInt(val, "httpErrorCode") } func (p configParser) ParseReadTimeout(val string) { config.ReadTimeout = parseDuration(val, "readTimeout") } func (p configParser) ParseDialTimeout(val string) { config.DialTimeout = parseDuration(val, "dialTimeout") } func (p configParser) ParseDetectSSLErr(val string) { config.DetectSSLErr = parseBool(val, "detectSSLErr") } func (p configParser) ParseEstimateTarget(val string) { config.EstimateTarget = val } // overrideConfig should contain options from command line to override options // in config file. func parseConfig(rc string, override *Config) { // fmt.Println("rcFile:", path) f, err := os.Open(expandTilde(rc)) if err != nil { Fatal("Error opening config file:", err) } IgnoreUTF8BOM(f) scanner := bufio.NewScanner(f) parser := reflect.ValueOf(configParser{}) zeroMethod := reflect.Value{} var lines []string // store lines for upgrade var n int for scanner.Scan() { lines = append(lines, scanner.Text()) n++ line := strings.TrimSpace(scanner.Text()) if line == "" || line[0] == '#' { continue } v := strings.SplitN(line, "=", 2) if len(v) != 2 { Fatal("config syntax error on line", n) } key, val := strings.TrimSpace(v[0]), strings.TrimSpace(v[1]) methodName := "Parse" + strings.ToUpper(key[0:1]) + key[1:] method := parser.MethodByName(methodName) if method == zeroMethod { Fatalf("no such option \"%s\"\n", key) } // for backward compatibility, allow empty string in shadowMethod and logFile if val == "" && key != "shadowMethod" && key != "logFile" { Fatalf("empty %s, please comment or remove unused option\n", key) } args := []reflect.Value{reflect.ValueOf(val)} method.Call(args) } if scanner.Err() != nil { Fatalf("Error reading rc file: %v\n", scanner.Err()) } f.Close() overrideConfig(&config, override) checkConfig() if configNeedUpgrade { upgradeConfig(rc, lines) } } func upgradeConfig(rc string, lines []string) { newrc := rc + ".upgrade" f, err := os.Create(newrc) if err != nil { fmt.Println("can't create upgraded config file") return } // Upgrade config. proxyId := 0 listenId := 0 w := bufio.NewWriter(f) for _, line := range lines { line := strings.TrimSpace(line) if line == "" || line[0] == '#' { w.WriteString(line + newLine) continue } v := strings.Split(line, "=") key := strings.TrimSpace(v[0]) switch key { case "listen": listen := listenProxy[listenId] listenId++ w.WriteString(listen.genConfig() + newLine) // comment out original w.WriteString("#" + line + newLine) case "httpParent", "shadowSocks", "socksParent": backPool, ok := parentProxy.(*backupParentPool) if !ok { panic("initial parent pool should be backup pool") } parent := backPool.parent[proxyId] proxyId++ w.WriteString(parent.genConfig() + newLine) // comment out original w.WriteString("#" + line + newLine) case "httpUserPasswd", "shadowPasswd", "shadowMethod", "addrInPAC": // just comment out w.WriteString("#" + line + newLine) case "proxy": proxyId++ w.WriteString(line + newLine) default: w.WriteString(line + newLine) } } w.Flush() f.Close() // Must close file before renaming, otherwise will fail on windows. // Rename new and old config file. if err := os.Rename(rc, rc+"0.8"); err != nil { fmt.Println("can't backup config file for upgrade:", err) return } if err := os.Rename(newrc, rc); err != nil { fmt.Println("can't rename upgraded rc to original name:", err) return } } func overrideConfig(oldconfig, override *Config) { newVal := reflect.ValueOf(override).Elem() oldVal := reflect.ValueOf(oldconfig).Elem() // typeOfT := newVal.Type() for i := 0; i < newVal.NumField(); i++ { newField := newVal.Field(i) oldField := oldVal.Field(i) // log.Printf("%d: %s %s = %v\n", i, // typeOfT.Field(i).Name, newField.Type(), newField.Interface()) switch newField.Kind() { case reflect.String: s := newField.String() if s != "" { oldField.SetString(s) } case reflect.Int: i := newField.Int() if i != 0 { oldField.SetInt(i) } } } oldconfig.EstimateTimeout = override.EstimateTimeout } // Must call checkConfig before using config. func checkConfig() { checkShadowsocks() // listenAddr must be handled first, as addrInPAC dependends on this. if listenProxy == nil { listenProxy = []Proxy{newHttpProxy(defaultListenAddr, "")} } } ================================================ FILE: config_test.go ================================================ package main import ( "testing" ) func TestParseListen(t *testing.T) { parser := configParser{} parser.ParseListen("http://127.0.0.1:8888") hp, ok := listenProxy[0].(*httpProxy) if !ok { t.Error("listen http proxy type wrong") } if hp.addr != "127.0.0.1:8888" { t.Error("listen http server address parse error") } parser.ParseListen("http://127.0.0.1:8888 1.2.3.4:5678") hp, ok = listenProxy[1].(*httpProxy) if hp.addrInPAC != "1.2.3.4:5678" { t.Error("listen http addrInPAC parse error") } } func TestTunnelAllowedPort(t *testing.T) { initConfig("") parser := configParser{} parser.ParseTunnelAllowedPort("1, 2, 3, 4, 5") parser.ParseTunnelAllowedPort("6") parser.ParseTunnelAllowedPort("7") parser.ParseTunnelAllowedPort("8") testData := []struct { port string allowed bool }{ {"80", true}, // default allowd ports {"443", true}, {"1", true}, {"3", true}, {"5", true}, {"7", true}, {"8080", false}, {"8388", false}, } for _, td := range testData { allowed := config.TunnelAllowedPort[td.port] if allowed != td.allowed { t.Errorf("port %s allowed %v, got %v\n", td.port, td.allowed, allowed) } } } func TestParseProxy(t *testing.T) { pool, ok := parentProxy.(*backupParentPool) if !ok { t.Fatal("parentPool by default should be backup pool") } cnt := -1 var parser configParser parser.ParseProxy("http://127.0.0.1:8080") cnt++ hp, ok := pool.parent[cnt].ParentProxy.(*httpParent) if !ok { t.Fatal("1st http proxy parsed not as httpParent") } if hp.server != "127.0.0.1:8080" { t.Error("1st http proxy server address wrong, got:", hp.server) } parser.ParseProxy("http://user:passwd@127.0.0.2:9090") cnt++ hp, ok = pool.parent[cnt].ParentProxy.(*httpParent) if !ok { t.Fatal("2nd http proxy parsed not as httpParent") } if hp.server != "127.0.0.2:9090" { t.Error("2nd http proxy server address wrong, got:", hp.server) } if hp.authHeader == nil { t.Error("2nd http proxy server user password not parsed") } parser.ParseProxy("socks5://127.0.0.1:1080") cnt++ sp, ok := pool.parent[cnt].ParentProxy.(*socksParent) if !ok { t.Fatal("socks proxy parsed not as socksParent") } if sp.server != "127.0.0.1:1080" { t.Error("socks server address wrong, got:", sp.server) } parser.ParseProxy("ss://aes-256-cfb:foobar!@127.0.0.1:1080") cnt++ _, ok = pool.parent[cnt].ParentProxy.(*shadowsocksParent) if !ok { t.Fatal("shadowsocks proxy parsed not as shadowsocksParent") } } ================================================ FILE: config_unix.go ================================================ // +build darwin freebsd linux netbsd openbsd package main import ( "path" ) const ( rcFname = "rc" blockedFname = "blocked" directFname = "direct" statFname = "stat" newLine = "\n" ) func getDefaultRcFile() string { return path.Join(path.Join(getUserHomeDir(), ".cow", rcFname)) } ================================================ FILE: config_windows.go ================================================ package main import ( "os" "path" ) const ( rcFname = "rc.txt" blockedFname = "blocked.txt" directFname = "direct.txt" statFname = "stat.txt" newLine = "\r\n" ) func getDefaultRcFile() string { // On windows, put the configuration file in the same directory of cow executable // This is not a reliable way to detect binary directory, but it works for double click and run return path.Join(path.Dir(os.Args[0]), rcFname) } ================================================ FILE: conn_pool.go ================================================ // Share server connections between different clients. package main import ( "sync" "time" ) // Maximum number of connections to a server. const maxServerConnCnt = 5 // Store each server's connections in separate channels, getting // connections for different servers can be done in parallel. type ConnPool struct { idleConn map[string]chan *serverConn muxConn chan *serverConn // connections support multiplexing sync.RWMutex } var connPool = &ConnPool{ idleConn: map[string]chan *serverConn{}, muxConn: make(chan *serverConn, maxServerConnCnt*2), } const muxConnHostPort = "@muxConn" func init() { // make sure hostPort here won't match any actual hostPort go closeStaleServerConn(connPool.muxConn, muxConnHostPort) } func getConnFromChan(ch chan *serverConn) (sv *serverConn) { for { select { case sv = <-ch: if sv.mayBeClosed() { sv.Close() continue } return sv default: return nil } } } func putConnToChan(sv *serverConn, ch chan *serverConn, chname string) { select { case ch <- sv: debug.Printf("connPool channel %s: put conn\n", chname) return default: // Simply close the connection if can't put into channel immediately. // A better solution would remove old connections from the channel and // add the new one. But's it's more complicated and this should happen // rarely. debug.Printf("connPool channel %s: full", chname) sv.Close() } } func (cp *ConnPool) Get(hostPort string, asDirect bool) (sv *serverConn) { // Get from site specific connection first. // Direct connection are all site specific, so must use site specific // first to avoid using parent proxy for direct sites. cp.RLock() ch := cp.idleConn[hostPort] cp.RUnlock() if ch != nil { sv = getConnFromChan(ch) } if sv != nil { debug.Printf("connPool %s: get conn\n", hostPort) return sv } // All mulplexing connections are for blocked sites, // so for direct sites we should stop here. if asDirect && !config.AlwaysProxy { return nil } sv = getConnFromChan(cp.muxConn) if bool(debug) && sv != nil { debug.Println("connPool mux: get conn", hostPort) } return sv } func (cp *ConnPool) Put(sv *serverConn) { // Multiplexing connections. switch sv.Conn.(type) { case httpConn, cowConn: putConnToChan(sv, cp.muxConn, "muxConn") return } // Site specific connections. cp.RLock() ch := cp.idleConn[sv.hostPort] cp.RUnlock() if ch == nil { debug.Printf("connPool %s: new channel\n", sv.hostPort) ch = make(chan *serverConn, maxServerConnCnt) ch <- sv cp.Lock() cp.idleConn[sv.hostPort] = ch cp.Unlock() // start a new goroutine to close stale server connections go closeStaleServerConn(ch, sv.hostPort) } else { putConnToChan(sv, ch, sv.hostPort) } } type chanInPool struct { hostPort string ch chan *serverConn } func (cp *ConnPool) CloseAll() { debug.Println("connPool: close all server connections") // Because closeServerConn may acquire connPool.Lock, we first collect all // channel, and close server connection for each one. var connCh []chanInPool cp.RLock() for hostPort, ch := range cp.idleConn { connCh = append(connCh, chanInPool{hostPort, ch}) } cp.RUnlock() for _, hc := range connCh { closeServerConn(hc.ch, hc.hostPort, true) } closeServerConn(cp.muxConn, muxConnHostPort, true) } func closeServerConn(ch chan *serverConn, hostPort string, force bool) (done bool) { // If force is true, close all idle connection even if it maybe open. lcnt := len(ch) if lcnt == 0 { // Execute the loop at least once. lcnt = 1 } for i := 0; i < lcnt; i++ { select { case sv := <-ch: if force || sv.mayBeClosed() { debug.Printf("connPool channel %s: close one conn\n", hostPort) sv.Close() } else { // Put it back and wait. debug.Printf("connPool channel %s: put back conn\n", hostPort) ch <- sv } default: if hostPort != muxConnHostPort { // No more connection in this channel, remove the channel from // the map. debug.Printf("connPool channel %s: remove\n", hostPort) connPool.Lock() delete(connPool.idleConn, hostPort) connPool.Unlock() } return true } } return false } func closeStaleServerConn(ch chan *serverConn, hostPort string) { // Tricky here. When removing a channel from the map, there maybe // goroutines doing Put and Get using that channel. // For Get, there's no problem because it will return immediately. // For Put, it's possible that a new connection is added to the // channel, but the channel is no longer in the map. // So after removed the channel from the map, we wait for several seconds // and then close all connections left in it. // It's possible that Put add the connection after the final wait, but // that should not happen in practice, and the worst result is just lost // some memory and open fd. for { time.Sleep(5 * time.Second) if done := closeServerConn(ch, hostPort, false); done { break } } // Final wait and then close all left connections. In practice, there // should be no other goroutines holding reference to the channel. time.Sleep(2 * time.Second) for { select { case sv := <-ch: debug.Printf("connPool channel %s: close conn after removed\n", hostPort) sv.Close() default: debug.Printf("connPool channel %s: cleanup done\n", hostPort) return } } } ================================================ FILE: conn_pool_test.go ================================================ package main import ( "testing" "time" ) func TestGetFromEmptyPool(t *testing.T) { // should not block sv := connPool.Get("foo", true) if sv != nil { t.Error("get non nil server conn from empty conn pool") } } func TestConnPool(t *testing.T) { closeOn := time.Now().Add(10 * time.Second) conns := []*serverConn{ {hostPort: "example.com:80", willCloseOn: closeOn}, {hostPort: "example.com:80", willCloseOn: closeOn}, {hostPort: "example.com:80", willCloseOn: closeOn}, {hostPort: "example.com:443", willCloseOn: closeOn}, {hostPort: "google.com:443", willCloseOn: closeOn}, {hostPort: "google.com:443", willCloseOn: closeOn}, {hostPort: "www.google.com:80", willCloseOn: closeOn}, } for _, sv := range conns { connPool.Put(sv) } testData := []struct { hostPort string found bool }{ {"example.com", false}, {"example.com:80", true}, {"example.com:80", true}, {"example.com:80", true}, {"example.com:80", false}, // has 3 such conn {"www.google.com:80", true}, } for _, td := range testData { sv := connPool.Get(td.hostPort, true) if td.found { if sv == nil { t.Error("should find conn for", td.hostPort) } else if sv.hostPort != td.hostPort { t.Errorf("hostPort should be: %s, got: %s\n", td.hostPort, sv.hostPort) } } else if sv != nil { t.Errorf("should NOT find conn for %s, got conn for: %s\n", td.hostPort, sv.hostPort) } } } ================================================ FILE: doc/implementation.md ================================================ # Design # ## Requst and response handling ## **Update** using the following design, it is actually difficult to correctly support HTTP pipelining. I've come up with a new design inspired by Naruil which should be much cleaner and easier to support HTTP pipelining. But as all major browsers, except Opera, does not enable HTTP pipelining by default, I don't think it's worth the effort to support HTTP pipelining now. I'll try to support it with the new design if the performance benefits of HTTP pipelining becomes significant in the future. The final design is evolved from different previous implementations. The other subsections following this one describe how its evolved. COW uses separate goroutines to read client requests and server responses. - For each client, COW will create one *request goroutine* to - accept client request (read from client connection) - create connection if no one not exist - send request to the server (write to server connection) - For each server connection, there will be an associated *response goroutine* - reading response from the web server (read from server connection) - send response back to the client (write to client connection) One client must have one request goroutine, and may have multiple response goroutine. Response goroutine is created when the server connection is created. This makes it possible for COW to support HTTP pipeline. (Not very sure about this.) COW does not pack multiple requests and send in batch, but it can send request before previous request response is received. If the client (browser) and the web server supports HTTP pipeline, then COW will not in effect make them go back to wating response for each request. But this design does make COW more complicated. I must be careful to avoid concurrency problems between the request and response goroutine. Here's things that worth noting: - The web server connection for each host is stored in a map - The request goroutine creates the connection and put it into this map - When serving requests, this map will be be used to find already created server connections - We should avoid writing the map in the response goroutine. So when response goroutine finishes, it should just mark the corresonding connection as closed instead of directly removing it from the map - Request and response goroutine may need to notify each other to stop - When client connection is closed, all response goroutine should stop - Client connection close can be detected in both request and response goroutine (as both will try to either read or write the connection), to make things simple, I just do notification in the request goroutine ## Notification between goroutines - Notification sender should not block - I use a size 1 channel for this as the notification will be sent only once - Receiver use polling to handle notification - For blocked calls, should set time out to actively poll notification ## Why parse client http request ## Of course we need to parse http request to know the address of the web server. Besides, HTTP requests sent to proxy servers are a little different from those sent directly to the web servers. So proxy server need to reconstruct http request - Normal HTTP 1.1 `GET` request has request URI like '/index.html', but when sending to proxy, it would be something like 'host.com/index.html' - The `CONNECT` request requires special handling by the proxy (send a 200 back to the client) ## Parse http response or not? ## The initial implementation serves client request one by one. For each request: 1. Parse client HTTP request 2. Connect to the server and send the request, send the response back to the client We need to know whether a response is finished so we can start to serve another request. (This is the oppisite to HTTP pipelining.) That's why we need to parse content-length header and chunked encoding. Parsing responses allow the proxy to put server connections back to a pool, thus allows different clients to reuse server connections. After supporting `CONNECT`, I realized that I can use a separate goroutine to read HTTP response from the server and pass it directly back to the client. This approach doesn't need to parse response to know when the response ends and then starts to process another request. **Update: not parsing HTTP response do have some problems.** Refer to section "But response parsing is necessary". This approach has several implications needs to be considered: - The proxy doesn't know whether the web server closes the connection by setting the header "Connection: close" - This should not be a big problem because web server should use persistent connection normally - And this header is passed directly to the client which would close it's connection to the proxy (even though the proxy didn't close this connection) - Even if the closed connection header is passed to the client, the client can simply create a new connection to the proxy and the proxy will detect the closed client connection - The server connection can only serve a single client connection. Because we don't know the boundary of responses, the proxy is unable to identify different responses and sends to different clients - This means that multiple clients connecting to the same server has to create different server connections - We have to create multiple connection to the same server to reduce latency any way, but makes it impossible to reuse server connection for different clients ### Why choose not parse ### I choosed not parsing the response because: - Associating client with dedicated server connection is simpler in implementation - As client could create multiple proxy connections to concurrently issue requests to reduce latency, the proxy can allow only a single connection to different web servers and thus connection pool is not needed - Not parsing the response reduces overhead - Need additional goroutine to handle response, so hard to say this definitely has better performance - If we are going to support HTTP pipelining, we may still need to handle response in separate goroutine ### But response parsing is necessary ### I've got a bug in handling HTTP response 302 when not parsing the response. When trying to visit "youku.com", it gives a "302" response with "Connection: close". The browser doesn't close the connection and still tries to get more content from the server after seeing the response. I tried polipo and see it will send back "302" response along with a "Content-Length: 0" to indicate the client that the response has finished. To add this kind of response editing capability for my proxy, I have to parse HTTP response. So the current solution is to parse the response in the a separate goroutine, which doesn't require lots of code change against the not parsing approach. # About supporting auto refresh # When blocked sites are detected because of error like connection resets and read time out, we can choose to redo the HTTP request by using parent proxy or just return error page and let the browser refresh. I tried to support auto refresh. But as I want support HTTP pipelining, the client request and server response read are in separate goroutine. The response reading goroutine need to send redo request to the client request goroutine and maintain a correct request handling order. The resulting code is very complex and difficult to maintain. Besides, the extra code to support auto refresh may incur performance overhead. As blocked sites will be recorded, the refresh is only needed for the first access to a blocked site. Auto refresh is just a minor case optimization. So I choose not to support auto refresh as the benefit is small. # Error printing policy # The goal is **make it easy to find the exact error location**. - Error should be printed as early as possible - If an error happens in a function which will be invoked at multiple places, print the error at the call site ================================================ FILE: doc/init.d/cow ================================================ #!/bin/bash ### BEGIN INIT INFO # Provides: cow # Required-Start: $network # Required-Stop: $network # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: COW: Climb Over the Wall http proxy # Description: Automatically detect blocked site and use parent proxy. ### END INIT INFO # Put this script under /etc/init.d/, then run "update-rc.d cow defaults". # Note: this script requires sudo in order to run COW as the specified # user. Please change the following variables in order to use this script. # COW will search for rc/direct/block/stat file under user's $HOME/.cow/ directory. BIN=/usr/local/bin/cow USER=usr GROUP=grp PID_DIR=/var/run PID_FILE=$PID_DIR/cow.pid LOG_FILE=/var/log/cow RET_VAL=0 check_running() { if [[ -r $PID_FILE ]]; then read PID <$PID_FILE if [[ -d "/proc/$PID" ]]; then return 0 else rm -f $PID_FILE return 1 fi else return 2 fi } do_status() { check_running case $? in 0) echo "cow running with PID $PID" ;; 1) echo "cow not running, remove PID file $PID_FILE" ;; 2) echo "Could not find PID file $PID_FILE, cow does not appear to be running" ;; esac return 0 } do_start() { if [[ ! -d $PID_DIR ]]; then echo "creating PID dir" mkdir $PID_DIR || echo "failed creating PID directory $PID_DIR"; exit 1 chown $USER:$GROUP $PID_DIR || echo "failed creating PID directory $PID_DIR"; exit 1 chmod 0770 $PID_DIR fi if check_running; then echo "cow already running with PID $PID" return 0 fi echo "starting cow" # sudo will set the group to the primary group of $USER sudo -u $USER -H -- $BIN >$LOG_FILE 2>&1 & PID=$! echo $PID > $PID_FILE sleep 0.3 if ! check_running; then echo "start failed" return 1 fi echo "cow running with PID $PID" return 0 } do_stop() { if check_running; then echo "stopping cow with PID $PID" kill $PID rm -f $PID_FILE else echo "Could not find PID file $PID_FILE" fi } do_restart() { do_stop do_start } case "$1" in start|stop|restart|status) do_$1 ;; *) echo "Usage: cow {start|stop|restart|status}" RET_VAL=1 ;; esac exit $RET_VAL ================================================ FILE: doc/logrotate.d/cow ================================================ /var/log/cow { rotate 4 weekly compress missingok postrotate /etc/init.d/cow restart endscript } ================================================ FILE: doc/osx/info.chenyufei.cow.plist ================================================ Label info.chenyufei.cow ProgramArguments COWBINARY KeepAlive RunAtLoad ================================================ FILE: doc/sample-config/rc ================================================ # 配置文件中 # 开头的行为注释 # # 代理服务器监听地址,重复多次来指定多个监听地址,语法: # # listen = protocol://[optional@]server_address:server_port # # 支持的 protocol 如下: # # HTTP (提供 http 代理): # listen = http://127.0.0.1:7777 # # 上面的例子中,cow 生成的 PAC url 为 http://127.0.0.1:7777/pac # 配置浏览器或系统 HTTP 和 HTTPS 代理时请填入该地址 # 若配置代理时有对所有协议使用该代理的选项,且你不清楚此选项的含义,请勾选 # # cow (需两个 cow 服务器配合使用): # listen = cow://encrypt_method:password@1.2.3.4:5678 # # 若 1.2.3.4:5678 在国外,位于国内的 cow 配置其为二级代理后,两个 cow 之间可以 # 通过加密连接传输 http 代理流量。目前的加密采用与 shadowsocks 相同的方式。 # # 其他说明: # - 若 server_address 为 0.0.0.0,监听本机所有 IP 地址 # - 可以用如下语法指定 PAC 中返回的代理服务器地址(当使用端口映射将 http 代理提供给外网时使用) # listen = http://127.0.0.1:7777 1.2.3.4:5678 # listen = http://127.0.0.1:7777 # 日志文件路径,如不指定则输出到 stdout #logFile = # COW 默认仅对被墙网站使用二级代理 # 下面选项设置为 true 后,所有网站都通过二级代理访问 #alwaysProxy = false # 指定多个二级代理时使用的负载均衡策略,可选策略如下 # # backup: 默认策略,优先使用第一个指定的二级代理,其他仅作备份使用 # hash: 根据请求的 host name,优先使用 hash 到的某一个二级代理 # latency: 优先选择连接延迟最低的二级代理 # # 一个二级代理连接失败后会依次尝试其他二级代理 # 失败的二级代理会以一定的概率再次尝试使用,因此恢复后会重新启用 #loadBalance = backup ############################# # 指定二级代理 ############################# # 二级代理统一使用下列语法指定: # # proxy = protocol://[authinfo@]server:port # # 重复使用 proxy 多次指定多个二级代理,backup 策略将按照二级代理出现的顺序来使用 # # 目前支持的二级代理及配置举例: # # SOCKS5: # proxy = socks5://127.0.0.1:1080 # # HTTP: # proxy = http://127.0.0.1:8080 # proxy = http://user:password@127.0.0.1:8080 # # 用户认证信息为可选项 # # shadowsocks: # proxy = ss://encrypt_method:password@1.2.3.4:8388 # proxy = ss://encrypt_method-auth:password@1.2.3.4:8388 # # encrypt_method 添加 -auth 启用 One Time Auth # authinfo 中指定加密方法和密码,所有支持的加密方法如下: # aes-128-cfb, aes-192-cfb, aes-256-cfb, # bf-cfb, cast5-cfb, des-cfb, rc4-md5, # chacha20, salsa20, rc4, table # 推荐使用 aes-128-cfb # # cow: # proxy = cow://method:passwd@1.2.3.4:4321 # # authinfo 与 shadowsocks 相同 ############################# # 执行 ssh 命令创建 SOCKS5 代理 ############################# # 下面的选项可以让 COW 执行 ssh 命令创建本地 SOCKS5 代理,并在 ssh 断开后重连 # COW 会自动使用通过 ssh 命令创建的代理,无需再通过 proxy 选项指定 # 可重复指定多个 # # 注意这一功能需要系统上已有 ssh 命令,且必须使用 ssh public key authentication # # 若指定该选项,COW 将执行以下命令: # ssh -n -N -D -p # server_ssh_port 端口不指定则默认为 22 # 如果要指定其他 ssh 选项,请修改 ~/.ssh/config #sshServer = user@server:local_socks_port[:server_ssh_port] ############################# # 认证 ############################# # 指定允许的 IP 或者网段。网段仅支持 IPv4,可以指定 IPv6 地址,用逗号分隔多个项 # 使用此选项时别忘了添加 127.0.0.1,否则本机访问也需要认证 #allowedClient = 127.0.0.1, 192.168.1.0/24, 10.0.0.0/8 # 要求客户端通过用户名密码认证 # COW 总是先验证 IP 是否在 allowedClient 中,若不在其中再通过用户名密码认证 #userPasswd = username:password # 如需指定多个用户名密码,可在下面选项指定的文件中列出,文件中每行内容如下 # username:password[:port] # port 为可选项,若指定,则该用户只能从指定端口连接 COW # 注意:如有重复用户,COW 会报错退出 #userPasswdFile = /path/to/file # 认证失效时间 # 语法:2h3m4s 表示 2 小时 3 分钟 4 秒 #authTimeout = 2h ############################# # 高级选项 ############################# # 将指定的 HTTP error code 认为是被干扰,使用二级代理重试,默认为空 #httpErrorCode = # 最多允许使用多少个 CPU 核 #core = 2 # 检测超时时间使用的网站,最好使用能快速访问的站点 #estimateTarget = example.com # 允许建立隧道连接的端口,多个端口用逗号分隔,可重复多次 # 默认总是允许下列服务的端口: ssh, http, https, rsync, imap, pop, jabber, cvs, git, svn # 如需允许其他端口,请用该选项添加 # 限制隧道连接的端口可以防止将运行 COW 的服务器上只监听本机 ip 的服务暴露给外部 #tunnelAllowedPort = 80, 443 # GFW 会使 DNS 解析超时,也可能返回错误的地址,能连接但是读不到任何内容 # 下面两个值改小一点可以加速检测网站是否被墙,但网络情况差时可能误判 # 创建连接超时(语法跟 authTimeout 相同) #dialTimeout = 5s # 从服务器读超时 #readTimeout = 5s # 基于 client 是否很快关闭连接来检测 SSL 错误,只对 Chrome 有效 # (Chrome 遇到 SSL 错误会直接关闭连接,而不是让用户选择是否继续) # 可能将可直连网站误判为被墙网站,当 GFW 进行 SSL 中间人攻击时可以考虑使用 #detectSSLErr = false # 修改 stat/blocked/direct 文件路径,如不指定,默认在配置文件所在目录下 # 执行 cow 的用户需要有对 stat 文件所在目录的写权限才能更新 stat 文件 #statFile = /stat #blockedFile = /blocked #directFile = /direct ================================================ FILE: doc/sample-config/rc-en ================================================ # Lines starting with "#" are comments. # # Listen address of the proxy server, repeat to specify multiple ones. # Syntax: # # listen = protocol://[optional@]server_address:server_port # # Supported protocols: # # HTTP (provides http proxy): # listen = http://127.0.0.1:7777 # # The generated PAC url in the above example is http://127.0.0.1:7777/pac # # cow (need two cow servers to use this protocol): # listen = cow://encrypt_method:password@1.2.3.4:5678 # # Suppose 1.2.3.4:5678 is outside your country and the network is not # disturbed, then COW running in your own country should configure it # as parent proxy. The two COW servers will use encrypted connection to # pass data. The encryption method used is the same as shadowsocks. # # Note: # - If server_address is 0.0.0.0, listen all IP addresses on the system. # - The following syntax can specify the proxy address in the generated PAC. # (Use this if you are using port forwarding to provide COW to external network.) # # listen = http://127.0.0.1:7777 1.2.3.4:5678 # listen = http://127.0.0.1:7777 # Log file path, defaults to stdout #logFile = # By default, COW only uses parent proxy if the site is blocked. # If the following option is true, COW will use parent proxy for all sites. #alwaysProxy = false # With multiple parent proxies, COW can employ one of the load balancing # strategies: # # backup: default policy, use the first prarent proxy in config, # the others are just backup # hash: hash to a specific parent proxy according to host name # latency: use the parent proxy with lowest connection latency # # When one parent proxy fails to connect, COW will try other parent proxies # in order. # Failed parent proxy will be tried with some probability, so they will be # used again after recovery. #loadBalance = backup ############################# # Specify parent proxy ############################# # Parent proxies are specified with a generic syntax (following RFC 3986): # # proxy = protocol://[authinfo@]server:port # # Repeat to specify multiple parent proxies. Backup load balancing will use # them in order if one fails to connect. # # Supported parent proxies and config example: # # SOCKS5: # proxy = socks5://127.0.0.1:1080 # # HTTP: # proxy = http://127.0.0.1:8080 # proxy = http://user:password@127.0.0.1:8080 # # authinfo is optional # # shadowsocks: # proxy = ss://encrypt_method:password@1.2.3.4:8388 # proxy = ss://encrypt_method-auth:password@1.2.3.4:8388 # # Append -auth to encrypt_method to enable One Time Auth. # authinfo specifies encryption method and password. # Here are the supported encryption methods: # # aes-128-cfb, aes-192-cfb, aes-256-cfb, # bf-cfb, cast5-cfb, des-cfb, rc4-md5, # chacha20, salsa20, rc4, table # # aes-128-cfb is recommended. # # cow: # proxy = cow://method:passwd@1.2.3.4:4321 # # authinfo is the same as shadowsocks parent proxy ############################# # Run ssh command to create SOCKS5 parent proxy ############################# # Note: shadowsocks is better, use it if you can. # The following option lets COW execute ssh command to create local # SOCKS5 proxy and automatically re-execute if ssh connection is closed. # The created SOCKS5 proxy will be used as a parent proxy. # The option can be repeated to create multiple SOCKS5 proxies. # # Note: requires ssh command and must use ssh public key authentication. # # COW will execute the following command if the option is given: # # ssh -n -N -D -p # # server_ssh_port defaults to 22 # Please modify ~/.ssh/config to specify other ssh options #sshServer = user@server:local_socks_port[:server_ssh_port] ############################# # Authentication ############################# # Specify allowed IP address (IPv4 and IPv6) or sub-network (only IPv4). # Don't forget to specify 127.0.0.1 with this option. #allowedClient = 127.0.0.1, 192.168.1.0/24, 10.0.0.0/8 # Require username and password authentication. COW always check IP in # allowedClient first, then ask for username authentication. #userPasswd = username:password # To specify multiple username and password, list all those in a file with # content like this: # # username:password[:port] # # port is optional, user can only connect from the specific port if specified. # COW will report error and exit if there's duplicated user. #userPasswdFile = /path/to/file # Time interval to keep authentication information. # Syntax: 2h3m4s means 2 hours 3 minutes 4 seconds #authTimeout = 2h ############################# # Advanced options ############################# # Take a specific HTTP error code as blocked and use parent proxy to retry. #httpErrorCode = # Maximum CPU core to use. #core = 2 # cow uses this site to estimate timeout, better to use a fast website. #estimateTarget = example.com # Ports allowed to create tunnel (HTTP CONNECT method), comma separated list # or repeat to append more ports. # Ports for the following service are allowed by default: # # ssh, http, https, rsync, imap, pop, jabber, cvs, git, svn # # Limiting ports for tunneling prevents exposing internal services to outside. #tunnelAllowedPort = 80, 443 # GFW may timeout DNS query, or return wrong server address which can connect # but blocks on read forever. # Decrease the following timeout values can speed up detecting blocked sites, # but may mistake normal sites as blocked. # DNS and connection timeout (same syntax with authTimeout). #dialTimeout = 5s # Read from server timeout. #readTimeout = 5s # Detect SSL error based on client close connection speed, only effective for # Chrome. # This detection is no reliable, may mistaken normal sites as blocked. # Only consider this option when GFW is making middle man attack. #detectSSLErr = false # Change the stat/blocked/direct file position, defaults to files under directory # containing rc file. # The cow user must write access to directory containing the stat file in order # to update stat. #statFile = /stat #blockedFile = /blocked #directFile = /direct ================================================ FILE: error.go ================================================ package main import ( "bytes" "io" "os" "text/template" "time" ) // Do not end with "\r\n" so we can add more header later var headRawTmpl = "HTTP/1.1 {{.CodeReason}}\r\n" + "Connection: keep-alive\r\n" + "Cache-Control: no-cache\r\n" + "Pragma: no-cache\r\n" + "Content-Type: text/html\r\n" + "Content-Length: {{.Length}}\r\n" var errPageTmpl, headTmpl *template.Template func init() { hostName, err := os.Hostname() if err != nil { hostName = "unknown host" } errPageRawTmpl := ` COW Proxy

{{.H1}}

{{.Msg}}
Generated by COW ` + version + `
Host ` + hostName + `
{{.T}} ` if headTmpl, err = template.New("errorHead").Parse(headRawTmpl); err != nil { Fatal("Internal error on generating error head template") } if errPageTmpl, err = template.New("errorPage").Parse(errPageRawTmpl); err != nil { Fatalf("Internal error on generating error page template") } } func genErrorPage(h1, msg string) (string, error) { var err error data := struct { H1 string Msg string T string }{ h1, msg, time.Now().Format(time.ANSIC), } buf := new(bytes.Buffer) err = errPageTmpl.Execute(buf, data) return buf.String(), err } func sendPageGeneric(w io.Writer, codeReason, h1, msg string) { page, err := genErrorPage(h1, msg) if err != nil { errl.Println("Error generating error page:", err) return } data := struct { CodeReason string Length int }{ codeReason, len(page), } buf := new(bytes.Buffer) if err := headTmpl.Execute(buf, data); err != nil { errl.Println("Error generating error page header:", err) return } buf.WriteString("\r\n") buf.WriteString(page) w.Write(buf.Bytes()) } func sendErrorPage(w io.Writer, codeReason, h1, msg string) { sendPageGeneric(w, codeReason, "[Error] "+h1, msg) } ================================================ FILE: estimate_timeout.go ================================================ package main import ( "fmt" "io" "net" "time" ) // For once blocked site, use min dial/read timeout to make switching to // parent proxy faster. const minDialTimeout = 3 * time.Second const minReadTimeout = 4 * time.Second const defaultDialTimeout = 5 * time.Second const defaultReadTimeout = 5 * time.Second const maxTimeout = 15 * time.Second var dialTimeout = defaultDialTimeout var readTimeout = defaultReadTimeout // estimateTimeout tries to fetch a url and adjust timeout value according to // how much time is spent on connect and fetch. This avoids incorrectly // considering non-blocked sites as blocked when network connection is bad. func estimateTimeout(host string, payload []byte) { //debug.Println("estimating timeout") buf := connectBuf.Get() defer connectBuf.Put(buf) var est time.Duration start := time.Now() c, err := net.Dial("tcp", host+":80") if err != nil { errl.Printf("estimateTimeout: can't connect to %s: %v, network has problem?\n", host, err) goto onErr } defer c.Close() est = time.Now().Sub(start) * 5 // debug.Println("estimated dialTimeout:", est) if est > maxTimeout { est = maxTimeout } if est > config.DialTimeout { dialTimeout = est debug.Println("new dial timeout:", dialTimeout) } else if dialTimeout != config.DialTimeout { dialTimeout = config.DialTimeout debug.Println("new dial timeout:", dialTimeout) } start = time.Now() // include time spent on sending request, reading all content to make it a // little longer if _, err = c.Write(payload); err != nil { errl.Println("estimateTimeout: error sending request:", err) goto onErr } for err == nil { _, err = c.Read(buf) } if err != io.EOF { errl.Printf("estimateTimeout: error getting %s: %v, network has problem?\n", host, err) goto onErr } est = time.Now().Sub(start) * 10 // debug.Println("estimated read timeout:", est) if est > maxTimeout { est = maxTimeout } if est > time.Duration(config.ReadTimeout) { readTimeout = est debug.Println("new read timeout:", readTimeout) } else if readTimeout != config.ReadTimeout { readTimeout = config.ReadTimeout debug.Println("new read timeout:", readTimeout) } return onErr: dialTimeout += 2 * time.Second readTimeout += 2 * time.Second } func runEstimateTimeout() { const estimateReq = "GET / HTTP/1.1\r\n" + "Host: %s\r\n" + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:11.0) Gecko/20100101 Firefox/11.0\r\n" + "Accept: */*\r\n" + "Accept-Language: en-us,en;q=0.5\r\n" + "Accept-Encoding: gzip, deflate\r\n" + "Connection: close\r\n\r\n" readTimeout = config.ReadTimeout dialTimeout = config.DialTimeout payload := []byte(fmt.Sprintf(estimateReq, config.EstimateTarget)) for { estimateTimeout(config.EstimateTarget, payload) time.Sleep(time.Minute) } } // Guess network status based on doing HTTP request to estimateSite func networkBad() bool { return (readTimeout != config.ReadTimeout) || (dialTimeout != config.DialTimeout) } ================================================ FILE: http.go ================================================ package main import ( "bytes" "errors" "fmt" "github.com/cyfdecyf/bufio" "net" "strconv" "strings" "time" ) const CRLF = "\r\n" const ( statusCodeContinue = 100 ) const ( statusBadReq = "400 Bad Request" statusForbidden = "403 Forbidden" statusExpectFailed = "417 Expectation Failed" statusRequestTimeout = "408 Request Timeout" ) var CustomHttpErr = errors.New("CustomHttpErr") type Header struct { ContLen int64 KeepAlive time.Duration ProxyAuthorization string Chunking bool Trailer bool ConnectionKeepAlive bool ExpectContinue bool Host string } type rqState byte const ( rsCreated rqState = iota rsSent // request has been sent to server rsRecvBody // response header received, receiving response body rsDone ) type Request struct { Method string URL *URL raw *bytes.Buffer // stores the raw content of request header rawByte []byte // underlying buffer for raw // request line from client starts at 0, cow generates request line that // can be sent directly to web server reqLnStart int // start of generated request line in raw headStart int // start of header in raw bodyStart int // start of body in raw Header isConnect bool partial bool // whether contains only partial request data state rqState tryCnt byte } // Assume keep-alive request by default. var zeroRequest = Request{Header: Header{ConnectionKeepAlive: true}} func (r *Request) reset() { b := r.rawByte raw := r.raw *r = zeroRequest // reset to zero value if raw != nil { raw.Reset() r.rawByte = b r.raw = raw } else { r.rawByte = httpBuf.Get() r.raw = bytes.NewBuffer(r.rawByte[:0]) // must use 0 length slice } } func (r *Request) String() (s string) { return fmt.Sprintf("%s %s%s", r.Method, r.URL.HostPort, r.URL.Path) } func (r *Request) Verbose() []byte { var rqbyte []byte if r.isConnect { rqbyte = r.rawBeforeBody() } else { // This includes client request line if has http parent proxy rqbyte = r.raw.Bytes() } return rqbyte } // Message body in request is signaled by the inclusion of a Content-Length // or Transfer-Encoding header. // Refer to http://stackoverflow.com/a/299696/306935 func (r *Request) hasBody() bool { return r.Chunking || r.ContLen > 0 } func (r *Request) isRetry() bool { return r.tryCnt > 1 } func (r *Request) tryOnce() { r.tryCnt++ } func (r *Request) tooManyRetry() bool { return r.tryCnt > 3 } func (r *Request) responseNotSent() bool { return r.state <= rsSent } func (r *Request) hasSent() bool { return r.state >= rsSent } func (r *Request) releaseBuf() { if r.raw != nil { httpBuf.Put(r.rawByte) r.rawByte = nil r.raw = nil } } // rawRequest returns the raw request that can be sent directly to HTTP/1.1 server. func (r *Request) rawRequest() []byte { return r.raw.Bytes()[r.reqLnStart:] } func (r *Request) rawBeforeBody() []byte { return r.raw.Bytes()[:r.bodyStart] } func (r *Request) rawHeaderBody() []byte { return r.raw.Bytes()[r.headStart:] } func (r *Request) rawBody() []byte { return r.raw.Bytes()[r.bodyStart:] } func (r *Request) proxyRequestLine() []byte { return r.raw.Bytes()[0:r.reqLnStart] } func (r *Request) genRequestLine() { // Generate normal HTTP request line r.raw.WriteString(r.Method + " ") if len(r.URL.Path) == 0 { r.raw.WriteString("/") } else { r.raw.WriteString(r.URL.Path) } r.raw.WriteString(" HTTP/1.1\r\n") } type Response struct { Status int Reason []byte Header raw *bytes.Buffer rawByte []byte } var zeroResponse = Response{Header: Header{ConnectionKeepAlive: true}} func (rp *Response) reset() { b := rp.rawByte raw := rp.raw *rp = zeroResponse if raw != nil { raw.Reset() rp.rawByte = b rp.raw = raw } else { rp.rawByte = httpBuf.Get() rp.raw = bytes.NewBuffer(rp.rawByte[:0]) } } func (rp *Response) releaseBuf() { if rp.raw != nil { httpBuf.Put(rp.rawByte) rp.rawByte = nil rp.raw = nil } } func (rp *Response) rawResponse() []byte { return rp.raw.Bytes() } func (rp *Response) genStatusLine() { rp.raw.Write([]byte("HTTP/1.1 ")) rp.raw.WriteString(strconv.Itoa(rp.Status)) if len(rp.Reason) != 0 { rp.raw.WriteByte(' ') rp.raw.Write(rp.Reason) } rp.raw.Write([]byte(CRLF)) return } func (rp *Response) String() string { return fmt.Sprintf("%d %s", rp.Status, rp.Reason) } func (rp *Response) Verbose() []byte { return rp.raw.Bytes() } type URL struct { HostPort string // must contain port Host string // no port Port string Domain string Path string } func (url *URL) String() string { return url.HostPort + url.Path } // Set all fields according to hostPort except Path. func (url *URL) ParseHostPort(hostPort string) { if hostPort == "" { return } host, port, err := net.SplitHostPort(hostPort) if err != nil { // Add default 80 and split again. If there's still error this time, // it's not because lack of port number. host = hostPort port = "80" hostPort = net.JoinHostPort(hostPort, port) } url.Host = host url.Port = port url.HostPort = hostPort url.Domain = host2Domain(host) } // net.ParseRequestURI will unescape encoded path, but the proxy doesn't need // that. Assumes the input rawurl is valid. Even if rawurl is not valid, net.Dial // will check the correctness of the host. func ParseRequestURI(rawurl string) (*URL, error) { return ParseRequestURIBytes([]byte(rawurl)) } func ParseRequestURIBytes(rawurl []byte) (*URL, error) { if rawurl[0] == '/' { return &URL{Path: string(rawurl)}, nil } var rest, scheme []byte id := bytes.Index(rawurl, []byte("://")) if id == -1 { rest = rawurl scheme = []byte("http") // default to http } else { scheme = rawurl[:id] ASCIIToLowerInplace(scheme) // it's ok to lower case scheme if !bytes.Equal(scheme, []byte("http")) && !bytes.Equal(scheme, []byte("https")) { errl.Printf("%s protocol not supported\n", scheme) return nil, errors.New("protocol not supported") } rest = rawurl[id+3:] } var hostport, host, port, path string id = bytes.IndexByte(rest, '/') if id == -1 { hostport = string(rest) } else { hostport = string(rest[:id]) path = string(rest[id:]) } // Must add port in host so it can be used as key to find the correct // server connection. // e.g. google.com:80 and google.com:443 should use different connections. host, port, err := net.SplitHostPort(hostport) if err != nil { // missing port host = hostport if len(scheme) == 4 { hostport = net.JoinHostPort(host, "80") port = "80" } else { hostport = net.JoinHostPort(host, "443") port = "443" } } // Fixed wechat image url bug, url like http://[::ffff:183.192.196.102]/mmsns/lVxxxxxx host = strings.TrimSuffix(strings.TrimPrefix(host, "[::ffff:"), "]") hostport = net.JoinHostPort(host, port) return &URL{hostport, host, port, host2Domain(host), path}, nil } // headers of interest to a proxy // Define them as constant and use editor's completion to avoid typos. // Note RFC2616 only says about "Connection", no "Proxy-Connection", but // Firefox and Safari send this header along with "Connection" header. // See more at http://homepage.ntlworld.com/jonathan.deboynepollard/FGA/web-proxy-connection-header.html const ( headerConnection = "connection" headerContentLength = "content-length" headerExpect = "expect" headerHost = "host" headerKeepAlive = "keep-alive" headerProxyAuthenticate = "proxy-authenticate" headerProxyAuthorization = "proxy-authorization" headerProxyConnection = "proxy-connection" headerReferer = "referer" headerTE = "te" headerTrailer = "trailer" headerTransferEncoding = "transfer-encoding" headerUpgrade = "upgrade" fullHeaderConnectionKeepAlive = "Connection: keep-alive\r\n" fullHeaderConnectionClose = "Connection: close\r\n" fullHeaderTransferEncoding = "Transfer-Encoding: chunked\r\n" ) // Using Go's method expression var headerParser = map[string]HeaderParserFunc{ headerConnection: (*Header).parseConnection, headerContentLength: (*Header).parseContentLength, headerExpect: (*Header).parseExpect, headerHost: (*Header).parseHost, headerKeepAlive: (*Header).parseKeepAlive, headerProxyAuthorization: (*Header).parseProxyAuthorization, headerProxyConnection: (*Header).parseConnection, headerTransferEncoding: (*Header).parseTransferEncoding, headerTrailer: (*Header).parseTrailer, } var hopByHopHeader = map[string]bool{ headerConnection: true, headerKeepAlive: true, headerProxyAuthorization: true, headerProxyConnection: true, headerTE: true, headerTrailer: true, headerTransferEncoding: true, headerUpgrade: true, } // Note: Value bytes passed to header parse function are in the buffer // associated with bufio and will be modified. It will also be stored in the // raw request buffer, so becareful when modifying the value bytes. (Change // case only when the spec says it is case insensitive.) // // If Header needs to hold raw value, make a copy. For example, // parseProxyAuthorization does this. type HeaderParserFunc func(*Header, []byte) error // Used by both "Connection" and "Proxy-Connection" header. COW always adds // connection header at the end of a request/response (in parseRequest and // parseResponse), no matter whether the original one has this header or not. // This will change the order of headers, but should be OK as RFC2616 4.2 says // header order is not significant. (Though general-header first is "good- // practice".) func (h *Header) parseConnection(s []byte) error { ASCIIToLowerInplace(s) h.ConnectionKeepAlive = !bytes.Contains(s, []byte("close")) return nil } func (h *Header) parseContentLength(s []byte) (err error) { h.ContLen, err = ParseIntFromBytes(s, 10) return err } func (h *Header) parseHost(s []byte) (err error) { if h.Host == "" { h.Host = string(s) } return } func (h *Header) parseKeepAlive(s []byte) (err error) { ASCIIToLowerInplace(s) id := bytes.Index(s, []byte("timeout=")) if id != -1 { id += len("timeout=") end := id for ; end < len(s) && IsDigit(s[end]); end++ { } delta, err := ParseIntFromBytes(s[id:end], 10) if err != nil { return err // possible empty bytes } h.KeepAlive = time.Second * time.Duration(delta) } return nil } func (h *Header) parseProxyAuthorization(s []byte) error { h.ProxyAuthorization = string(s) return nil } func (h *Header) parseTransferEncoding(s []byte) error { ASCIIToLowerInplace(s) // For transfer-encoding: identify, it's the same as specifying neither // content-length nor transfer-encoding. h.Chunking = bytes.Contains(s, []byte("chunked")) if !h.Chunking && !bytes.Contains(s, []byte("identity")) { return fmt.Errorf("invalid transfer encoding: %s", s) } return nil } // RFC 2616 3.6.1 states when trailers are allowed: // // a) request includes TE header // b) server is the original server // // Even though COW removes TE header, the original server can still respond // with Trailer header. // As Trailer is general header, it's possible to appear in request. But is // there any client does this? func (h *Header) parseTrailer(s []byte) error { // use errl to test if this header is common to see errl.Printf("got Trailer header: %s\n", s) if len(s) != 0 { h.Trailer = true } return nil } // For now, COW does not fully support 100-continue. It will return "417 // expectation failed" if a request contains expect header. This is one of the // strategies supported by polipo, which is easiest to implement in cow. // TODO If we see lots of expect 100-continue usage, provide full support. func (h *Header) parseExpect(s []byte) error { ASCIIToLowerInplace(s) errl.Printf("Expect header: %s\n", s) // put here to see if expect header is widely used h.ExpectContinue = true /* if bytes.Contains(s, []byte("100-continue")) { h.ExpectContinue = true } */ return nil } func splitHeader(s []byte) (name, val []byte, err error) { i := bytes.IndexByte(s, ':') if i < 0 { return nil, nil, fmt.Errorf("malformed header: %#v", string(s)) } // Do not lower case field value, as it maybe case sensitive return ASCIIToLower(s[:i]), TrimSpace(s[i+1:]), nil } // Learned from net.textproto. One difference is that this one keeps the // ending '\n' in the returned line. Buf if there's only CRLF in the line, // return nil for the line. func readContinuedLineSlice(r *bufio.Reader) ([]byte, error) { // feedly.com request headers contains things like: // "$Authorization.feedly: $FeedlyAuth\r\n", so we must test for only // continuation spaces. isspace := func(b byte) bool { return b == ' ' || b == '\t' } // Read the first line. line, err := r.ReadSlice('\n') if err != nil { return nil, err } // There are servers that use \n for line ending, so trim first before check ending. // For example, the 404 page for http://plan9.bell-labs.com/magic/man2html/1/2l trimmed := TrimSpace(line) if len(trimmed) == 0 { if len(line) > 2 { return nil, fmt.Errorf("malformed end of headers, len: %d, %#v", len(line), string(line)) } return nil, nil } if isspace(line[0]) { return nil, fmt.Errorf("malformed header, start with space: %#v", string(line)) } // Optimistically assume that we have started to buffer the next line // and it starts with an ASCII letter (the next header key), so we can // avoid copying that buffered data around in memory and skipping over // non-existent whitespace. if r.Buffered() > 0 { peek, err := r.Peek(1) if err == nil && !isspace(peek[0]) { return line, nil } } var buf []byte buf = append(buf, trimmed...) // Read continuation lines. for skipSpace(r) > 0 { line, err := r.ReadSlice('\n') if err != nil { break } buf = append(buf, ' ') buf = append(buf, TrimTrailingSpace(line)...) } buf = append(buf, '\r', '\n') return buf, nil } func skipSpace(r *bufio.Reader) int { n := 0 for { c, err := r.ReadByte() if err != nil { // Bufio will keep err until next read. break } if c != ' ' && c != '\t' { r.UnreadByte() break } n++ } return n } // Only add headers that are of interest for a proxy into request/response's header map. func (h *Header) parseHeader(reader *bufio.Reader, raw *bytes.Buffer, url *URL) (err error) { h.ContLen = -1 for { var line, name, val []byte if line, err = readContinuedLineSlice(reader); err != nil || len(line) == 0 { return } if name, val, err = splitHeader(line); err != nil { errl.Printf("split header %v\nline: %s\nraw header:\n%s\n", err, line, raw.Bytes()) return } // Wait Go to solve/provide the string<->[]byte optimization kn := string(name) if parseFunc, ok := headerParser[kn]; ok { if len(val) == 0 { continue } if err = parseFunc(h, val); err != nil { errl.Printf("parse header %v\nline: %s\nraw header:\n%s\n", err, line, raw.Bytes()) return } } if hopByHopHeader[kn] { continue } raw.Write(line) // debug.Printf("len %d %s", len(s), s) } } // Parse the request line and header, does not touch body func parseRequest(c *clientConn, r *Request) (err error) { var s []byte reader := c.bufRd c.setReadTimeout("parseRequest") // parse request line if s, err = reader.ReadSlice('\n'); err != nil { if isErrTimeout(err) { return errClientTimeout } return err } c.unsetReadTimeout("parseRequest") // debug.Printf("Request line %s", s) r.reset() if config.saveReqLine { r.raw.Write(s) r.reqLnStart = len(s) } var f [][]byte // Tolerate with multiple spaces and '\t' is achieved by FieldsN. if f = FieldsN(s, 3); len(f) != 3 { return fmt.Errorf("malformed request line: %#v", string(s)) } ASCIIToUpperInplace(f[0]) r.Method = string(f[0]) // Parse URI into host and path r.URL, err = ParseRequestURIBytes(f[1]) if err != nil { return } r.Header.Host = r.URL.HostPort // If Header.Host is set, parseHost will just return. if r.Method == "CONNECT" { r.isConnect = true if bool(dbgRq) && verbose && !config.saveReqLine { r.raw.Write(s) } } else { r.genRequestLine() } r.headStart = r.raw.Len() // Read request header. if err = r.parseHeader(reader, r.raw, r.URL); err != nil { errl.Printf("parse request header: %v %s\n%s", err, r, r.Verbose()) return err } if r.Chunking { r.raw.WriteString(fullHeaderTransferEncoding) } if r.ConnectionKeepAlive { r.raw.WriteString(fullHeaderConnectionKeepAlive) } else { r.raw.WriteString(fullHeaderConnectionClose) } // The spec says proxy must add Via header. polipo disables this by // default, and I don't want to let others know the user is using COW, so // don't add it. r.raw.WriteString(CRLF) r.bodyStart = r.raw.Len() return } // If an http response may have message body func (rp *Response) hasBody(method string) bool { if method == "HEAD" || rp.Status == 304 || rp.Status == 204 || rp.Status < 200 { return false } return true } // Parse response status and headers. func parseResponse(sv *serverConn, r *Request, rp *Response) (err error) { var s []byte reader := sv.bufRd if sv.maybeFake() { sv.setReadTimeout("parseResponse") } if s, err = reader.ReadSlice('\n'); err != nil { // err maybe timeout caused by explicity setting deadline, EOF, or // reset caused by GFW. debug.Printf("read response status line %v %v\n", err, r) // Server connection with error will not be used any more, so no need // to unset timeout. // For read error, return directly in order to identify whether this // is caused by GFW. return err } if sv.maybeFake() { sv.unsetReadTimeout("parseResponse") } // debug.Printf("Response line %s", s) // response status line parsing var f [][]byte if f = FieldsN(s, 3); len(f) < 2 { // status line are separated by SP return fmt.Errorf("malformed response status line: %#v %v", string(s), r) } status, err := ParseIntFromBytes(f[1], 10) rp.reset() rp.Status = int(status) if err != nil { return fmt.Errorf("response status not valid: %s len=%d %v", f[1], len(f[1]), err) } if len(f) == 3 { rp.Reason = f[2] } proto := f[0] if !bytes.Equal(proto[0:7], []byte("HTTP/1.")) { return fmt.Errorf("invalid response status line: %s request %v", string(f[0]), r) } if proto[7] == '1' { rp.raw.Write(s) } else if proto[7] == '0' { // Should return HTTP version as 1.1 to client since closed connection // will be converted to chunked encoding rp.genStatusLine() } else { return fmt.Errorf("response protocol not supported: %s", f[0]) } if err = rp.parseHeader(reader, rp.raw, r.URL); err != nil { errl.Printf("parse response header: %v %s\n%s", err, r, rp.Verbose()) return err } //Check for http error code from config file if config.HttpErrorCode > 0 && rp.Status == config.HttpErrorCode { debug.Println("Requested http code is raised") return CustomHttpErr } if rp.Status == statusCodeContinue && !r.ExpectContinue { // not expecting 100-continue, just ignore it and read final response errl.Println("Ignore server 100 response for", r) return parseResponse(sv, r, rp) } if rp.Chunking { rp.raw.WriteString(fullHeaderTransferEncoding) } else if rp.ContLen == -1 { // No chunk, no content length, assume close to signal end. rp.ConnectionKeepAlive = false if rp.hasBody(r.Method) { // Connection close, no content length specification. // Use chunked encoding to pass content back to client. debug.Println("add chunked encoding to close connection response", r, rp) rp.raw.WriteString(fullHeaderTransferEncoding) } else { debug.Println("add content-length 0 to close connection response", r, rp) rp.raw.WriteString("Content-Length: 0\r\n") } } // Whether COW should respond with keep-alive depends on client request, // not server response. if r.ConnectionKeepAlive { rp.raw.WriteString(fullHeaderConnectionKeepAlive) rp.raw.WriteString(fullKeepAliveHeader) } else { rp.raw.WriteString(fullHeaderConnectionClose) } rp.raw.WriteString(CRLF) return nil } func unquote(s string) string { return strings.Trim(s, "\"") } func parseKeyValueList(str string) map[string]string { list := strings.Split(str, ",") if len(list) == 1 && list[0] == "" { return nil } res := make(map[string]string) for _, ele := range list { kv := strings.SplitN(strings.TrimSpace(ele), "=", 2) if len(kv) != 2 { errl.Println("no equal sign in key value list element:", ele) return nil } key, val := kv[0], unquote(kv[1]) res[key] = val } return res } ================================================ FILE: http_test.go ================================================ package main import ( "bytes" "github.com/cyfdecyf/bufio" "strings" "testing" "time" ) func TestParseRequestURI(t *testing.T) { var testData = []struct { rawurl string url *URL }{ // I'm really tired of typing google.com ... {"http://www.g.com", &URL{"www.g.com:80", "www.g.com", "80", "g.com", ""}}, {"http://plus.g.com/", &URL{"plus.g.com:80", "plus.g.com", "80", "g.com", "/"}}, {"https://g.com:80", &URL{"g.com:80", "g.com", "80", "g.com", ""}}, {"http://mail.g.com:80/", &URL{"mail.g.com:80", "mail.g.com", "80", "g.com", "/"}}, {"http://g.com:80/ncr", &URL{"g.com:80", "g.com", "80", "g.com", "/ncr"}}, {"https://g.com/ncr/tree", &URL{"g.com:443", "g.com", "443", "g.com", "/ncr/tree"}}, {"www.g.com.hk:80/", &URL{"www.g.com.hk:80", "www.g.com.hk", "80", "g.com.hk", "/"}}, {"g.com.jp:80", &URL{"g.com.jp:80", "g.com.jp", "80", "g.com.jp", ""}}, {"g.com", &URL{"g.com:80", "g.com", "80", "g.com", ""}}, {"g.com:8000/ncr", &URL{"g.com:8000", "g.com", "8000", "g.com", "/ncr"}}, {"g.com/ncr/tree", &URL{"g.com:80", "g.com", "80", "g.com", "/ncr/tree"}}, {"simplehost", &URL{"simplehost:80", "simplehost", "80", "", ""}}, {"simplehost:8080", &URL{"simplehost:8080", "simplehost", "8080", "", ""}}, {"192.168.1.1:8080/", &URL{"192.168.1.1:8080", "192.168.1.1", "8080", "", "/"}}, {"/helloworld", &URL{"", "", "", "", "/helloworld"}}, } for _, td := range testData { url, err := ParseRequestURI(td.rawurl) if url == nil { if err == nil { t.Error("nil URL must report error") } if td.url != nil { t.Error(td.rawurl, "should not report error") } continue } if err != nil { t.Error(td.rawurl, "non nil URL should not report error") } if url.HostPort != td.url.HostPort { t.Error(td.rawurl, "parsed hostPort wrong:", td.url.HostPort, "got", url.HostPort) } if url.Host != td.url.Host { t.Error(td.rawurl, "parsed host wrong:", td.url.Host, "got", url.Host) } if url.Port != td.url.Port { t.Error(td.rawurl, "parsed port wrong:", td.url.Port, "got", url.Port) } if url.Domain != td.url.Domain { t.Error(td.rawurl, "parsed domain wrong:", td.url.Domain, "got", url.Domain) } if url.Path != td.url.Path { t.Error(td.rawurl, "parsed path wrong:", td.url.Path, "got", url.Path) } } } func TestParseHeader(t *testing.T) { var testData = []struct { raw string newraw string header *Header }{ {"content-length: 64\r\nConnection: keep-alive\r\n\r\n", "content-length: 64\r\n", &Header{ContLen: 64, Chunking: false, ConnectionKeepAlive: true}}, {"Connection: keep-alive\r\nKeep-Alive: timeout=10\r\nTransfer-Encoding: chunked\r\nTE: trailers\r\n\r\n", "", &Header{ContLen: -1, Chunking: true, ConnectionKeepAlive: true, KeepAlive: 10 * time.Second}}, {"Connection:\r\n keep-alive\r\nKeep-Alive: max=5,\r\n timeout=10\r\n\r\n", "", &Header{ContLen: -1, Chunking: false, ConnectionKeepAlive: true, KeepAlive: 10 * time.Second}}, {"Connection: \r\n close\r\nLong: line\r\n continued\r\n\tagain\r\n\r\n", "Long: line continued again\r\n", &Header{ContLen: -1, Chunking: false, ConnectionKeepAlive: false}}, } for _, td := range testData { var h Header var newraw bytes.Buffer h.parseHeader(bufio.NewReader(strings.NewReader(td.raw)), &newraw, nil) if h.ContLen != td.header.ContLen { t.Errorf("%q parsed content length wrong, should be %d, get %d\n", td.raw, td.header.ContLen, h.ContLen) } if h.Chunking != td.header.Chunking { t.Errorf("%q parsed chunking wrong, should be %t, get %t\n", td.raw, td.header.Chunking, h.Chunking) } if h.ConnectionKeepAlive != td.header.ConnectionKeepAlive { t.Errorf("%q parsed connection wrong, should be %v, get %v\n", td.raw, td.header.ConnectionKeepAlive, h.ConnectionKeepAlive) } if h.KeepAlive != td.header.KeepAlive { t.Errorf("%q parsed keep alive wrong, should be %v, get %v\n", td.raw, td.header.KeepAlive, h.KeepAlive) } if newraw.String() != td.newraw { t.Errorf("%q parsed raw wrong\nshould be: %q\ngot: %q\n", td.raw, td.newraw, newraw.Bytes()) } } } ================================================ FILE: install-cow.sh ================================================ #!/bin/bash version=0.9.8 arch=`uname -m` case $arch in "x86_64") arch="64" ;; "i386" | "i586" | "i486" | "i686") arch="32" ;; "armv5tel" | "armv6l" | "armv7l") features=`cat /proc/cpuinfo | grep Features` if [[ ! "$features" =~ "vfp" ]]; then #arm without vfp must use GOARM=5 binary #see https://github.com/golang/go/wiki/GoArm arch="-armv5tel" else arch="-$arch" fi ;; *) echo "$arch currently has no precompiled binary" ;; esac os=`uname -s` case $os in "Darwin") os="mac" ;; "Linux") os="linux" ;; *) echo "$os currently has no precompiled binary" exit 1 esac exit_on_fail() { if [ $? != 0 ]; then echo $1 exit 1 fi } while true; do # Get install directory from environment variable. if [[ -n $COW_INSTALLDIR && -d $COW_INSTALLDIR ]]; then install_dir=$COW_INSTALLDIR break fi # Get installation directory from user echo -n "Install cow binary to which directory (absolute path, defaults to current dir): " read install_dir $la_dir/$plist || \ exit_on_fail "Download startup plist file to $la_dir failed" fi # Move binary to install directory echo "Move $tmpbin to $install_dir (will run sudo if no write permission to install directory)" if [ -w $install_dir ]; then mv $tmpbin $install_dir else sudo mv $tmpbin $install_dir fi exit_on_fail "Failed to move $tmpbin to $install_dir" rmdir $tmpdir # Done echo if $is_update; then echo "Update finished." else echo "Installation finished." echo "Please edit $config_dir/rc according to your own settings." echo 'After that, execute "cow &" to start cow and run in background.' fi ================================================ FILE: log.go ================================================ package main // This logging trick is learnt from a post by Rob Pike // https://groups.google.com/d/msg/golang-nuts/gU7oQGoCkmg/j3nNxuS2O_sJ import ( "flag" "fmt" "io" "log" "os" "github.com/cyfdecyf/color" ) type infoLogging bool type debugLogging bool type errorLogging bool type requestLogging bool type responseLogging bool var ( info infoLogging debug debugLogging errl errorLogging dbgRq requestLogging dbgRep responseLogging logFile io.Writer // make sure logger can be called before initLog errorLog = log.New(os.Stdout, "[ERROR] ", log.LstdFlags) debugLog = log.New(os.Stdout, "[DEBUG] ", log.LstdFlags) requestLog = log.New(os.Stdout, "[>>>>>] ", log.LstdFlags) responseLog = log.New(os.Stdout, "[<<<<<] ", log.LstdFlags) verbose bool colorize bool ) func init() { flag.BoolVar((*bool)(&info), "info", true, "info log") flag.BoolVar((*bool)(&debug), "debug", false, "debug log, with this option, log goes to stdout with color") flag.BoolVar((*bool)(&errl), "err", true, "error log") flag.BoolVar((*bool)(&dbgRq), "request", false, "request log") flag.BoolVar((*bool)(&dbgRep), "reply", false, "reply log") flag.BoolVar(&verbose, "v", false, "more info in request/response logging") flag.BoolVar(&colorize, "color", false, "colorize log output") } func initLog() { logFile = os.Stdout if config.LogFile != "" { if f, err := os.OpenFile(expandTilde(config.LogFile), os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0600); err != nil { fmt.Printf("Can't open log file, logging to stdout: %v\n", err) } else { logFile = f } } log.SetOutput(logFile) if colorize { color.SetDefaultColor(color.ANSI) } else { color.SetDefaultColor(color.NoColor) } errorLog = log.New(logFile, color.Red("[ERROR] "), log.LstdFlags) debugLog = log.New(logFile, color.Blue("[DEBUG] "), log.LstdFlags) requestLog = log.New(logFile, color.Green("[>>>>>] "), log.LstdFlags) responseLog = log.New(logFile, color.Yellow("[<<<<<] "), log.LstdFlags) } func (d infoLogging) Printf(format string, args ...interface{}) { if d { log.Printf(format, args...) } } func (d infoLogging) Println(args ...interface{}) { if d { log.Println(args...) } } func (d debugLogging) Printf(format string, args ...interface{}) { if d { debugLog.Printf(format, args...) } } func (d debugLogging) Println(args ...interface{}) { if d { debugLog.Println(args...) } } func (d errorLogging) Printf(format string, args ...interface{}) { if d { errorLog.Printf(format, args...) } } func (d errorLogging) Println(args ...interface{}) { if d { errorLog.Println(args...) } } func (d requestLogging) Printf(format string, args ...interface{}) { if d { requestLog.Printf(format, args...) } } func (d responseLogging) Printf(format string, args ...interface{}) { if d { responseLog.Printf(format, args...) } } func Fatal(args ...interface{}) { fmt.Println(args...) os.Exit(1) } func Fatalf(format string, args ...interface{}) { fmt.Printf(format, args...) os.Exit(1) } ================================================ FILE: main.go ================================================ package main import ( // "flag" "os" "os/exec" "runtime" // "runtime/pprof" "sync" "syscall" ) // var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") var ( quit chan struct{} relaunch bool ) // This code is from goagain func lookPath() (argv0 string, err error) { argv0, err = exec.LookPath(os.Args[0]) if nil != err { return } if _, err = os.Stat(argv0); nil != err { return } return } func main() { quit = make(chan struct{}) // Parse flags after load config to allow override options in config cmdLineConfig := parseCmdLineConfig() if cmdLineConfig.PrintVer { printVersion() os.Exit(0) } parseConfig(cmdLineConfig.RcFile, cmdLineConfig) initSelfListenAddr() initLog() initAuth() initSiteStat() initPAC() // initPAC uses siteStat, so must init after site stat initStat() initParentPool() /* if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { Fatal(err) } pprof.StartCPUProfile(f) } */ if config.Core > 0 { runtime.GOMAXPROCS(config.Core) } go sigHandler() go runSSH() if config.EstimateTimeout { go runEstimateTimeout() } else { info.Println("timeout estimation disabled") } var wg sync.WaitGroup wg.Add(len(listenProxy)) for _, proxy := range listenProxy { go proxy.Serve(&wg, quit) } wg.Wait() if relaunch { info.Println("Relunching cow...") // Need to fork me. argv0, err := lookPath() if nil != err { errl.Println(err) return } err = syscall.Exec(argv0, os.Args, os.Environ()) if err != nil { errl.Println(err) } } debug.Println("the main process is , exiting...") } ================================================ FILE: main_unix.go ================================================ // +build darwin freebsd linux netbsd openbsd package main import ( "os" "os/signal" "syscall" ) func sigHandler() { sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGUSR1) for sig := range sigChan { // May handle other signals in the future. info.Printf("%v caught, exit\n", sig) storeSiteStat(siteStatExit) if sig == syscall.SIGUSR1 { relaunch = true } close(quit) break } /* if *cpuprofile != "" { pprof.StopCPUProfile() } */ } ================================================ FILE: main_windows.go ================================================ package main import ( "os" "os/signal" "syscall" ) func sigHandler() { // TODO On Windows, these signals will not be triggered on closing cmd // window. How to detect this? sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) for sig := range sigChan { // May handle other signals in the future. info.Printf("%v caught, exit\n", sig) storeSiteStat(siteStatExit) // Windows has no SIGUSR1 signal, so relaunching is not supported now. /* if sig == syscall.SIGUSR1 { relaunch = true } */ close(quit) break } /* if *cpuprofile != "" { pprof.StopCPUProfile() } */ } ================================================ FILE: pac.go ================================================ package main import ( "bytes" "fmt" "net" "strings" "sync" "text/template" "time" ) var pac struct { template *template.Template topLevelDomain string directList string // Assignments and reads to directList are in different goroutines. Go // does not guarantee atomic assignment, so we should protect these racing // access. dLRWMutex sync.RWMutex } func getDirectList() string { pac.dLRWMutex.RLock() dl := pac.directList pac.dLRWMutex.RUnlock() return dl } func updateDirectList() { dl := strings.Join(siteStat.GetDirectList(), "\",\n\"") pac.dLRWMutex.Lock() pac.directList = dl pac.dLRWMutex.Unlock() } func init() { const pacRawTmpl = `var direct = 'DIRECT'; var httpProxy = 'PROXY {{.ProxyAddr}}; DIRECT'; var directList = [ "", "{{.DirectDomains}}" ]; var directAcc = {}; for (var i = 0; i < directList.length; i += 1) { directAcc[directList[i]] = true; } var topLevel = { {{.TopLevel}} }; // hostIsIP determines whether a host address is an IP address and whether // it is private. Currenly only handles IPv4 addresses. function hostIsIP(host) { var part = host.split('.'); if (part.length != 4) { return [false, false]; } var n; for (var i = 3; i >= 0; i--) { if (part[i].length === 0 || part[i].length > 3) { return [false, false]; } n = Number(part[i]); if (isNaN(n) || n < 0 || n > 255) { return [false, false]; } } if (part[0] == '127' || part[0] == '10' || (part[0] == '192' && part[1] == '168')) { return [true, true]; } if (part[0] == '172') { n = Number(part[1]); if (16 <= n && n <= 31) { return [true, true]; } } return [true, false]; } function host2Domain(host) { var arr, isIP, isPrivate; arr = hostIsIP(host); isIP = arr[0]; isPrivate = arr[1]; if (isPrivate) { return ""; } if (isIP) { return host; } var lastDot = host.lastIndexOf('.'); if (lastDot === -1) { return ""; // simple host name has no domain } // Find the second last dot dot2ndLast = host.lastIndexOf(".", lastDot-1); if (dot2ndLast === -1) return host; var part = host.substring(dot2ndLast+1, lastDot); if (topLevel[part]) { var dot3rdLast = host.lastIndexOf(".", dot2ndLast-1); if (dot3rdLast === -1) { return host; } return host.substring(dot3rdLast+1); } return host.substring(dot2ndLast+1); } function FindProxyForURL(url, host) { if (url.substring(0,4) == "ftp:") return direct; if (host.substring(0,7) == "::ffff:") return direct; if (host.indexOf(".local", host.length - 6) !== -1) { return direct; } var domain = host2Domain(host); if (host.length == domain.length) { return directAcc[host] ? direct : httpProxy; } return (directAcc[host] || directAcc[domain]) ? direct : httpProxy; } ` var err error pac.template, err = template.New("pac").Parse(pacRawTmpl) if err != nil { Fatal("Internal error on generating pac file template:", err) } var buf bytes.Buffer for k, _ := range topLevelDomain { buf.WriteString(fmt.Sprintf("\t\"%s\": true,\n", k)) } pac.topLevelDomain = buf.String()[:buf.Len()-2] // remove the final comma } // No need for content-length as we are closing connection var pacHeader = []byte("HTTP/1.1 200 OK\r\nServer: cow-proxy\r\n" + "Content-Type: application/x-ns-proxy-autoconfig\r\nConnection: close\r\n\r\n") // Different client will have different proxy URL, so generate it upon each request. func genPAC(c *clientConn) []byte { buf := new(bytes.Buffer) hproxy, ok := c.proxy.(*httpProxy) if !ok { panic("sendPAC should only be called for http proxy") } proxyAddr := hproxy.addrInPAC if proxyAddr == "" { host, _, err := net.SplitHostPort(c.LocalAddr().String()) // This is the only check to split host port on tcp addr's string // representation in COW. Keep it so we will notice if there's any // problem in the future. if err != nil { panic("split host port on local address error") } proxyAddr = net.JoinHostPort(host, hproxy.port) } dl := getDirectList() if dl == "" { // Empty direct domain list buf.Write(pacHeader) pacproxy := fmt.Sprintf("function FindProxyForURL(url, host) { return 'PROXY %s; DIRECT'; };", proxyAddr) buf.Write([]byte(pacproxy)) return buf.Bytes() } data := struct { ProxyAddr string DirectDomains string TopLevel string }{ proxyAddr, dl, pac.topLevelDomain, } buf.Write(pacHeader) if err := pac.template.Execute(buf, data); err != nil { errl.Println("Error generating pac file:", err) panic("Error generating pac file") } return buf.Bytes() } func initPAC() { // we can't control goroutine scheduling, make sure when // initPAC is done, direct list is updated updateDirectList() go func() { for { time.Sleep(time.Minute) updateDirectList() } }() } func sendPAC(c *clientConn) error { _, err := c.Write(genPAC(c)) if err != nil { debug.Printf("cli(%s) error sending PAC: %s", c.RemoteAddr(), err) } return err } ================================================ FILE: pac.js ================================================ var direct = 'DIRECT'; var httpProxy = 'PROXY'; var directList = [ "", // corresponds to simple host name and ip address "taobao.com", "www.baidu.com" ]; var directAcc = {}; for (var i = 0; i < directList.length; i += 1) { directAcc[directList[i]] = true; } var topLevel = { "ac": true, "co": true, "com": true, "edu": true, "gov": true, "net": true, "org": true }; // hostIsIP determines whether a host address is an IP address and whether // it is private. Currenly only handles IPv4 addresses. function hostIsIP(host) { var part = host.split('.'); if (part.length != 4) { return [false, false]; } var n; for (var i = 3; i >= 0; i--) { if (part[i].length === 0 || part[i].length > 3) { return [false, false]; } n = Number(part[i]); if (isNaN(n) || n < 0 || n > 255) { return [false, false]; } } if (part[0] == '127' || part[0] == '10' || (part[0] == '192' && part[1] == '168')) { return [true, true]; } if (part[0] == '172') { n = Number(part[1]); if (16 <= n && n <= 31) { return [true, true]; } } return [true, false]; } function host2Domain(host) { var arr, isIP, isPrivate; arr = hostIsIP(host); isIP = arr[0]; isPrivate = arr[1]; if (isPrivate) { return ""; } if (isIP) { return host; } var lastDot = host.lastIndexOf('.'); if (lastDot === -1) { return ""; // simple host name has no domain } // Find the second last dot dot2ndLast = host.lastIndexOf(".", lastDot-1); if (dot2ndLast === -1) return host; var part = host.substring(dot2ndLast+1, lastDot); if (topLevel[part]) { var dot3rdLast = host.lastIndexOf(".", dot2ndLast-1); if (dot3rdLast === -1) { return host; } return host.substring(dot3rdLast+1); } return host.substring(dot2ndLast+1); } function FindProxyForURL(url, host) { if (url.substring(0,4) == "ftp:") return direct; if (host.indexOf(".local", host.length - 6) !== -1) { return direct; } var domain = host2Domain(host); if (host.length == domain.length) { return directAcc[host] ? direct : httpProxy; } return (directAcc[host] || directAcc[domain]) ? direct : httpProxy; } // Tests var testData, td, i; testData = [ { ip: '127.0.0.1', isIP: true, isPrivate: true }, { ip: '127.2.1.1', isIP: true, isPrivate: true }, { ip: '192.168.1.1', isIP: true, isPrivate: true }, { ip: '172.16.1.1', isIP: true, isPrivate: true }, { ip: '172.20.1.1', isIP: true, isPrivate: true }, { ip: '172.31.1.1', isIP: true, isPrivate: true }, { ip: '172.15.1.1', isIP: true, isPrivate: false }, { ip: '172.32.1.1', isIP: true, isPrivate: false }, { ip: '10.16.1.1', isIP: true, isPrivate: true }, { ip: '12.3.4.5', isIP: true, isPrivate: false }, { ip: '1.2.3.4.5', isIP: false, isPrivate: false }, { ip: 'google.com', isIP: false, isPrivate: false }, { ip: 'www.google.com.hk', isIP: false, isPrivate: false } ]; for (i = 0; i < testData.length; i += 1) { td = testData[i]; arr = hostIsIP(td.ip); if (arr[0] !== td.isIP) { if (td.isIP) { console.log(td.ip + " is ip"); } else { console.log(td.ip + " is NOT ip"); } } if (arr[0] !== td.isIP) { if (td.isIP) { console.log(td.ip + " is private ip"); } else { console.log(td.ip + " is NOT private ip"); } } } testData = [ // private ip should return direct { host: '192.168.1.1', mode: direct}, { host: '10.1.1.1', mode: direct}, { host: '172.16.2.1', mode: direct}, { host: '172.20.255.255', mode: direct}, { host: '172.31.255.255', mode: direct}, { host: '192.168.2.255', mode: direct}, // simple host should return direct { host: 'localhost', mode: direct}, { host: 'simple', mode: direct}, // non private ip should return proxy { host: '172.32.2.255', mode: httpProxy}, { host: '172.15.0.255', mode: httpProxy}, { host: '12.20.2.1', mode: httpProxy}, // host in direct domain/host should return direct { host: 'taobao.com', mode: direct}, { host: 'www.taobao.com', mode: direct}, { host: 'www.baidu.com', mode: direct}, // host not in direct domain should return proxy { host: 'baidu.com', mode: httpProxy}, { host: 'foo.baidu.com', mode: httpProxy}, { host: 'google.com', mode: httpProxy}, { host: 'www.google.com', mode: httpProxy}, { host: 'www.google.com.hk', mode: httpProxy}, // host in local domain should return direct { host: 'test.local', mode: direct}, { host: '.local', mode: direct}, ]; for (i = 0; i < testData.length; i += 1) { td = testData[i]; if (FindProxyForURL("", td.host) !== td.mode) { if (td.mode === direct) { console.log(td.host + " should return direct"); } else { console.log(td.host + " should return proxy"); } } } ================================================ FILE: parent_proxy.go ================================================ package main import ( "encoding/base64" "encoding/binary" "errors" "fmt" ss "github.com/shadowsocks/shadowsocks-go/shadowsocks" "hash/crc32" "io" "math/rand" "net" "sort" "strconv" "sync" "time" ) // Interface that all types of parent proxies should support. type ParentProxy interface { connect(*URL) (net.Conn, error) getServer() string // for use in updating server latency genConfig() string // for upgrading config } // Interface for different proxy selection strategy. type ParentPool interface { add(ParentProxy) empty() bool // Select a proxy from the pool and connect. May try several proxies until // one that succees, return nil and error if all parent proxies fail. connect(*URL) (net.Conn, error) } // Init parentProxy to be backup pool. So config parsing have a pool to add // parent proxies. var parentProxy ParentPool = &backupParentPool{} func initParentPool() { backPool, ok := parentProxy.(*backupParentPool) if !ok { panic("initial parent pool should be backup pool") } if debug { printParentProxy(backPool.parent) } if len(backPool.parent) == 0 { info.Println("no parent proxy server") return } if len(backPool.parent) == 1 && config.LoadBalance != loadBalanceBackup { debug.Println("only 1 parent, no need for load balance") config.LoadBalance = loadBalanceBackup } switch config.LoadBalance { case loadBalanceHash: debug.Println("hash parent pool", len(backPool.parent)) parentProxy = &hashParentPool{*backPool} case loadBalanceLatency: debug.Println("latency parent pool", len(backPool.parent)) go updateParentProxyLatency() parentProxy = newLatencyParentPool(backPool.parent) } } func printParentProxy(parent []ParentWithFail) { debug.Println("avaiable parent proxies:") for _, pp := range parent { switch pc := pp.ParentProxy.(type) { case *shadowsocksParent: debug.Println("\tshadowsocks: ", pc.server) case *httpParent: debug.Println("\thttp parent: ", pc.server) case *socksParent: debug.Println("\tsocks parent: ", pc.server) case *cowParent: debug.Println("\tcow parent: ", pc.server) } } } type ParentWithFail struct { ParentProxy fail int } // Backup load balance strategy: // Select proxy in the order they appear in config. type backupParentPool struct { parent []ParentWithFail } func (pp *backupParentPool) empty() bool { return len(pp.parent) == 0 } func (pp *backupParentPool) add(parent ParentProxy) { pp.parent = append(pp.parent, ParentWithFail{parent, 0}) } func (pp *backupParentPool) connect(url *URL) (srvconn net.Conn, err error) { return connectInOrder(url, pp.parent, 0) } // Hash load balance strategy: // Each host will use a proxy based on a hash value. type hashParentPool struct { backupParentPool } func (pp *hashParentPool) connect(url *URL) (srvconn net.Conn, err error) { start := int(crc32.ChecksumIEEE([]byte(url.Host)) % uint32(len(pp.parent))) debug.Printf("hash host %s try %d parent first", url.Host, start) return connectInOrder(url, pp.parent, start) } func (parent *ParentWithFail) connect(url *URL) (srvconn net.Conn, err error) { const maxFailCnt = 30 srvconn, err = parent.ParentProxy.connect(url) if err != nil { if parent.fail < maxFailCnt && !networkBad() { parent.fail++ } return } parent.fail = 0 return } func connectInOrder(url *URL, pp []ParentWithFail, start int) (srvconn net.Conn, err error) { const baseFailCnt = 9 var skipped []int nproxy := len(pp) if nproxy == 0 { return nil, errors.New("no parent proxy") } for i := 0; i < nproxy; i++ { proxyId := (start + i) % nproxy parent := &pp[proxyId] // skip failed server, but try it with some probability if parent.fail > 0 && rand.Intn(parent.fail+baseFailCnt) != 0 { skipped = append(skipped, proxyId) continue } if srvconn, err = parent.connect(url); err == nil { return } } // last resort, try skipped one, not likely to succeed for _, skippedId := range skipped { if srvconn, err = pp[skippedId].connect(url); err == nil { return } } return nil, err } type ParentWithLatency struct { ParentProxy latency time.Duration } type latencyParentPool struct { parent []ParentWithLatency } func newLatencyParentPool(parent []ParentWithFail) *latencyParentPool { lp := &latencyParentPool{} for _, p := range parent { lp.add(p.ParentProxy) } return lp } func (pp *latencyParentPool) empty() bool { return len(pp.parent) == 0 } func (pp *latencyParentPool) add(parent ParentProxy) { pp.parent = append(pp.parent, ParentWithLatency{parent, 0}) } // Sort interface. func (pp *latencyParentPool) Len() int { return len(pp.parent) } func (pp *latencyParentPool) Swap(i, j int) { p := pp.parent p[i], p[j] = p[j], p[i] } func (pp *latencyParentPool) Less(i, j int) bool { p := pp.parent return p[i].latency < p[j].latency } const latencyMax = time.Hour var latencyMutex sync.RWMutex func (pp *latencyParentPool) connect(url *URL) (srvconn net.Conn, err error) { var lp []ParentWithLatency // Read slice first. latencyMutex.RLock() lp = pp.parent latencyMutex.RUnlock() var skipped []int nproxy := len(lp) if nproxy == 0 { return nil, errors.New("no parent proxy") } for i := 0; i < nproxy; i++ { parent := lp[i] if parent.latency >= latencyMax { skipped = append(skipped, i) continue } if srvconn, err = parent.connect(url); err == nil { debug.Println("lowest latency proxy", parent.getServer()) return } parent.latency = latencyMax } // last resort, try skipped one, not likely to succeed for _, skippedId := range skipped { if srvconn, err = lp[skippedId].connect(url); err == nil { return } } return nil, err } func (parent *ParentWithLatency) updateLatency(wg *sync.WaitGroup) { defer wg.Done() proxy := parent.ParentProxy server := proxy.getServer() host, port, err := net.SplitHostPort(server) if err != nil { panic("split host port parent server error" + err.Error()) } // Resolve host name first, so latency does not include resolve time. ip, err := net.LookupHost(host) if err != nil { parent.latency = latencyMax return } ipPort := net.JoinHostPort(ip[0], port) const N = 3 var total time.Duration for i := 0; i < N; i++ { now := time.Now() cn, err := net.DialTimeout("tcp", ipPort, dialTimeout) if err != nil { debug.Println("latency update dial:", err) total += time.Minute // 1 minute as penalty continue } total += time.Now().Sub(now) cn.Close() time.Sleep(5 * time.Millisecond) } parent.latency = total / N debug.Println("latency", server, parent.latency) } func (pp *latencyParentPool) updateLatency() { // Create a copy, update latency for the copy. var cp latencyParentPool cp.parent = append(cp.parent, pp.parent...) // cp.parent is value instead of pointer, if we use `_, p := range cp.parent`, // the value in cp.parent will not be updated. var wg sync.WaitGroup wg.Add(len(cp.parent)) for i, _ := range cp.parent { cp.parent[i].updateLatency(&wg) } wg.Wait() // Sort according to latency. sort.Stable(&cp) debug.Println("latency lowest proxy", cp.parent[0].getServer()) // Update parent slice. latencyMutex.Lock() pp.parent = cp.parent latencyMutex.Unlock() } func updateParentProxyLatency() { lp, ok := parentProxy.(*latencyParentPool) if !ok { return } for { lp.updateLatency() time.Sleep(60 * time.Second) } } // http parent proxy type httpParent struct { server string userPasswd string // for upgrade config authHeader []byte } type httpConn struct { net.Conn parent *httpParent } func (s httpConn) String() string { return "http parent proxy " + s.parent.server } func newHttpParent(server string) *httpParent { return &httpParent{server: server} } func (hp *httpParent) getServer() string { return hp.server } func (hp *httpParent) genConfig() string { if hp.userPasswd != "" { return fmt.Sprintf("proxy = http://%s@%s", hp.userPasswd, hp.server) } else { return fmt.Sprintf("proxy = http://%s", hp.server) } } func (hp *httpParent) initAuth(userPasswd string) { if userPasswd == "" { return } hp.userPasswd = userPasswd b64 := base64.StdEncoding.EncodeToString([]byte(userPasswd)) hp.authHeader = []byte(headerProxyAuthorization + ": Basic " + b64 + CRLF) } func (hp *httpParent) connect(url *URL) (net.Conn, error) { c, err := net.Dial("tcp", hp.server) if err != nil { errl.Printf("can't connect to http parent %s for %s: %v\n", hp.server, url.HostPort, err) return nil, err } debug.Printf("connected to: %s via http parent: %s\n", url.HostPort, hp.server) return httpConn{c, hp}, nil } // shadowsocks parent proxy type shadowsocksParent struct { server string method string // method and passwd are for upgrade config passwd string cipher *ss.Cipher } type shadowsocksConn struct { net.Conn parent *shadowsocksParent } func (s shadowsocksConn) String() string { return "shadowsocks proxy " + s.parent.server } // In order to use parent proxy in the order specified in the config file, we // insert an uninitialized proxy into parent proxy list, and initialize it // when all its config have been parsed. func newShadowsocksParent(server string) *shadowsocksParent { return &shadowsocksParent{server: server} } func (sp *shadowsocksParent) getServer() string { return sp.server } func (sp *shadowsocksParent) genConfig() string { method := sp.method if method == "" { method = "table" } return fmt.Sprintf("proxy = ss://%s:%s@%s", method, sp.passwd, sp.server) } func (sp *shadowsocksParent) initCipher(method, passwd string) { sp.method = method sp.passwd = passwd cipher, err := ss.NewCipher(method, passwd) if err != nil { Fatal("create shadowsocks cipher:", err) } sp.cipher = cipher } func (sp *shadowsocksParent) connect(url *URL) (net.Conn, error) { c, err := ss.Dial(url.HostPort, sp.server, sp.cipher.Copy()) if err != nil { errl.Printf("can't connect to shadowsocks parent %s for %s: %v\n", sp.server, url.HostPort, err) return nil, err } debug.Println("connected to:", url.HostPort, "via shadowsocks:", sp.server) return shadowsocksConn{c, sp}, nil } // cow parent proxy type cowParent struct { server string method string passwd string cipher *ss.Cipher } type cowConn struct { net.Conn parent *cowParent } func (s cowConn) String() string { return "cow proxy " + s.parent.server } func newCowParent(srv, method, passwd string) *cowParent { cipher, err := ss.NewCipher(method, passwd) if err != nil { Fatal("create cow cipher:", err) } return &cowParent{srv, method, passwd, cipher} } func (cp *cowParent) getServer() string { return cp.server } func (cp *cowParent) genConfig() string { method := cp.method if method == "" { method = "table" } return fmt.Sprintf("proxy = cow://%s:%s@%s", method, cp.passwd, cp.server) } func (cp *cowParent) connect(url *URL) (net.Conn, error) { c, err := net.Dial("tcp", cp.server) if err != nil { errl.Printf("can't connect to cow parent %s for %s: %v\n", cp.server, url.HostPort, err) return nil, err } debug.Printf("connected to: %s via cow parent: %s\n", url.HostPort, cp.server) ssconn := ss.NewConn(c, cp.cipher.Copy()) return cowConn{ssconn, cp}, nil } // For socks documentation, refer to rfc 1928 http://www.ietf.org/rfc/rfc1928.txt var socksError = [...]string{ 1: "General SOCKS server failure", 2: "Connection not allowed by ruleset", 3: "Network unreachable", 4: "Host unreachable", 5: "Connection refused", 6: "TTL expired", 7: "Command not supported", 8: "Address type not supported", 9: "to X'FF' unassigned", } var socksProtocolErr = errors.New("socks protocol error") var socksMsgVerMethodSelection = []byte{ 0x5, // version 5 1, // n method 0, // no authorization required } // socks5 parent proxy type socksParent struct { server string } type socksConn struct { net.Conn parent *socksParent } func (s socksConn) String() string { return "socks proxy " + s.parent.server } func newSocksParent(server string) *socksParent { return &socksParent{server} } func (sp *socksParent) getServer() string { return sp.server } func (sp *socksParent) genConfig() string { return fmt.Sprintf("proxy = socks5://%s", sp.server) } func (sp *socksParent) connect(url *URL) (net.Conn, error) { c, err := net.Dial("tcp", sp.server) if err != nil { errl.Printf("can't connect to socks parent %s for %s: %v\n", sp.server, url.HostPort, err) return nil, err } hasErr := false defer func() { if hasErr { c.Close() } }() var n int if n, err = c.Write(socksMsgVerMethodSelection); n != 3 || err != nil { errl.Printf("sending ver/method selection msg %v n = %v\n", err, n) hasErr = true return nil, err } // version/method selection repBuf := make([]byte, 2) _, err = io.ReadFull(c, repBuf) if err != nil { errl.Printf("read ver/method selection error %v\n", err) hasErr = true return nil, err } if repBuf[0] != 5 || repBuf[1] != 0 { errl.Printf("socks ver/method selection reply error ver %d method %d", repBuf[0], repBuf[1]) hasErr = true return nil, err } // debug.Println("Socks version selection done") // send connect request host := url.Host port, err := strconv.Atoi(url.Port) if err != nil { errl.Printf("should not happen, port error %v\n", port) hasErr = true return nil, err } hostLen := len(host) bufLen := 5 + hostLen + 2 // last 2 is port reqBuf := make([]byte, bufLen) reqBuf[0] = 5 // version 5 reqBuf[1] = 1 // cmd: connect // reqBuf[2] = 0 // rsv: set to 0 when initializing reqBuf[3] = 3 // atyp: domain name reqBuf[4] = byte(hostLen) copy(reqBuf[5:], host) binary.BigEndian.PutUint16(reqBuf[5+hostLen:5+hostLen+2], uint16(port)) if n, err = c.Write(reqBuf); err != nil || n != bufLen { errl.Printf("send socks request err %v n %d\n", err, n) hasErr = true return nil, err } // I'm not clear why the buffer is fixed at 10. The rfc document does not say this. // Polipo set this to 10 and I also observed the reply is always 10. replyBuf := make([]byte, 10) if n, err = c.Read(replyBuf); err != nil { // Seems that socks server will close connection if it can't find host if err != io.EOF { errl.Printf("read socks reply err %v n %d\n", err, n) } hasErr = true return nil, errors.New("connection failed (by socks server " + sp.server + "). No such host?") } // debug.Printf("Socks reply length %d\n", n) if replyBuf[0] != 5 { errl.Printf("socks reply connect %s VER %d not supported\n", url.HostPort, replyBuf[0]) hasErr = true return nil, socksProtocolErr } if replyBuf[1] != 0 { errl.Printf("socks reply connect %s error %s\n", url.HostPort, socksError[replyBuf[1]]) hasErr = true return nil, socksProtocolErr } if replyBuf[3] != 1 { errl.Printf("socks reply connect %s ATYP %d\n", url.HostPort, replyBuf[3]) hasErr = true return nil, socksProtocolErr } debug.Println("connected to:", url.HostPort, "via socks server:", sp.server) // Now the socket can be used to pass data. return socksConn{c, sp}, nil } ================================================ FILE: proxy.go ================================================ package main import ( "bytes" "errors" "fmt" "io" "net" "strings" "sync" "time" "github.com/cyfdecyf/bufio" "github.com/cyfdecyf/leakybuf" ss "github.com/shadowsocks/shadowsocks-go/shadowsocks" ) // As I'm using ReadSlice to read line, it's possible to get // bufio.ErrBufferFull while reading line, so set it to a large value to // prevent such problems. // // For limits about URL and HTTP header size, refer to: // http://stackoverflow.com/questions/417142/what-is-the-maximum-length-of-a-url // "de facto limit of 2000 characters" // http://www.mnot.net/blog/2011/07/11/what_proxies_must_do // "URIs should be allowed at least 8000 octets, and HTTP headers should have // 4000 as an absolute minimum". // In practice, there are sites using cookies larger than 4096 bytes, // e.g. www.fitbit.com. So set http buffer size to 8192 to be safe. const httpBufSize = 8192 // Hold at most 4MB memory as buffer for parsing http request/response and // holding post data. var httpBuf = leakybuf.NewLeakyBuf(512, httpBufSize) // If no keep-alive header in response, use this as the keep-alive value. const defaultServerConnTimeout = 15 * time.Second // Close client connection if no new requests received in some time. // (On OS X, the default soft limit of open file descriptor is 256, which is // very conservative and easy to cause problem if we are not careful to limit // open fds.) const clientConnTimeout = 15 * time.Second const fullKeepAliveHeader = "Keep-Alive: timeout=15\r\n" // If client closed connection for HTTP CONNECT method in less then 1 second, // consider it as an ssl error. This is only effective for Chrome which will // drop connection immediately upon SSL error. const sslLeastDuration = time.Second // Some code are learnt from the http package // encapulate actual error for an retry error type RetryError struct { error } func isErrRetry(err error) bool { if err == nil { return false } _, ok := err.(RetryError) return ok } var zeroTime time.Time type directConn struct { net.Conn } func (dc directConn) String() string { return "direct connection" } type serverConnState byte const ( svConnected serverConnState = iota svSendRecvResponse svStopped ) type serverConn struct { net.Conn bufRd *bufio.Reader buf []byte // buffer for the buffered reader hostPort string state serverConnState willCloseOn time.Time siteInfo *VisitCnt visited bool } type clientConn struct { net.Conn // connection to the proxy client bufRd *bufio.Reader buf []byte // buffer for the buffered reader proxy Proxy } var ( errPageSent = errors.New("error page has sent") errClientTimeout = errors.New("read client request timeout") errAuthRequired = errors.New("authentication requried") ) type Proxy interface { Serve(*sync.WaitGroup, <-chan struct{}) Addr() string genConfig() string // for upgrading config } var listenProxy []Proxy func addListenProxy(p Proxy) { listenProxy = append(listenProxy, p) } type httpProxy struct { addr string // listen address, contains port port string // for use when generating PAC addrInPAC string // proxy server address to use in PAC } func newHttpProxy(addr, addrInPAC string) *httpProxy { _, port, err := net.SplitHostPort(addr) if err != nil { panic("proxy addr" + err.Error()) } return &httpProxy{addr, port, addrInPAC} } func (proxy *httpProxy) genConfig() string { if proxy.addrInPAC != "" { return fmt.Sprintf("listen = http://%s %s", proxy.addr, proxy.addrInPAC) } else { return fmt.Sprintf("listen = http://%s", proxy.addr) } } func (proxy *httpProxy) Addr() string { return proxy.addr } func (hp *httpProxy) Serve(wg *sync.WaitGroup, quit <-chan struct{}) { defer func() { wg.Done() }() ln, err := net.Listen("tcp", hp.addr) if err != nil { fmt.Println("listen http failed:", err) return } var exit bool go func() { <-quit exit = true ln.Close() }() host, _, _ := net.SplitHostPort(hp.addr) var pacURL string if host == "" || host == "0.0.0.0" { pacURL = fmt.Sprintf("http://:%s/pac", hp.port) } else if hp.addrInPAC == "" { pacURL = fmt.Sprintf("http://%s/pac", hp.addr) } else { pacURL = fmt.Sprintf("http://%s/pac", hp.addrInPAC) } info.Printf("COW %s listen http %s, PAC url %s\n", version, hp.addr, pacURL) for { conn, err := ln.Accept() if err != nil && !exit { errl.Printf("http proxy(%s) accept %v\n", ln.Addr(), err) if isErrTooManyOpenFd(err) { connPool.CloseAll() } time.Sleep(time.Millisecond) continue } if exit { debug.Println("exiting the http listner") break } c := newClientConn(conn, hp) go c.serve() } } type cowProxy struct { addr string method string passwd string cipher *ss.Cipher } func newCowProxy(method, passwd, addr string) *cowProxy { cipher, err := ss.NewCipher(method, passwd) if err != nil { Fatal("can't initialize cow proxy server", err) } return &cowProxy{addr, method, passwd, cipher} } func (cp *cowProxy) genConfig() string { method := cp.method if method == "" { method = "table" } return fmt.Sprintf("listen = cow://%s:%s@%s", method, cp.passwd, cp.addr) } func (cp *cowProxy) Addr() string { return cp.addr } func (cp *cowProxy) Serve(wg *sync.WaitGroup, quit <-chan struct{}) { defer func() { wg.Done() }() ln, err := net.Listen("tcp", cp.addr) if err != nil { fmt.Println("listen cow failed:", err) return } info.Printf("COW %s cow proxy address %s\n", version, cp.addr) var exit bool go func() { <-quit exit = true ln.Close() }() for { conn, err := ln.Accept() if err != nil && !exit { errl.Printf("cow proxy(%s) accept %v\n", ln.Addr(), err) if isErrTooManyOpenFd(err) { connPool.CloseAll() } time.Sleep(time.Millisecond) continue } if exit { debug.Println("exiting cow listner") break } ssConn := ss.NewConn(conn, cp.cipher.Copy()) c := newClientConn(ssConn, cp) go c.serve() } } func newClientConn(cli net.Conn, proxy Proxy) *clientConn { buf := httpBuf.Get() c := &clientConn{ Conn: cli, buf: buf, bufRd: bufio.NewReaderFromBuf(cli, buf), proxy: proxy, } if debug { debug.Printf("cli(%s) connected, total %d clients\n", cli.RemoteAddr(), incCliCnt()) } return c } func (c *clientConn) releaseBuf() { if c.bufRd != nil { // debug.Println("release client buffer") httpBuf.Put(c.buf) c.buf = nil c.bufRd = nil } } func (c *clientConn) Close() { c.releaseBuf() if debug { debug.Printf("cli(%s) closed, total %d clients\n", c.RemoteAddr(), decCliCnt()) } c.Conn.Close() } func (c *clientConn) setReadTimeout(msg string) { // Always keep connections alive for cow conn from client for more reuse. // For other client connections, set read timeout so we can close the // connection after a period of idle to reduce number of open connections. if _, ok := c.Conn.(*ss.Conn); !ok { // make actual timeout a little longer than keep-alive value sent to client setConnReadTimeout(c.Conn, clientConnTimeout+2*time.Second, msg) } } func (c *clientConn) unsetReadTimeout(msg string) { if _, ok := c.Conn.(*ss.Conn); !ok { unsetConnReadTimeout(c.Conn, msg) } } // Listen address as key, not including port part. var selfListenAddr map[string]bool // Called in main, so no need to protect concurrent initialization. func initSelfListenAddr() { selfListenAddr = make(map[string]bool) // Add empty host to self listen addr, in case there's no Host header. selfListenAddr[""] = true for _, proxy := range listenProxy { addr := proxy.Addr() // Handle wildcard address. if addr[0] == ':' || strings.HasPrefix(addr, "0.0.0.0") { for _, ad := range hostAddr() { selfListenAddr[ad] = true } selfListenAddr["localhost"] = true continue } host, _, err := net.SplitHostPort(addr) if err != nil { panic("listen addr invalid: " + addr) } selfListenAddr[host] = true if host == "127.0.0.1" { selfListenAddr["localhost"] = true } else if host == "localhost" { selfListenAddr["127.0.0.1"] = true } } } func isSelfRequest(r *Request) bool { if r.URL.HostPort != "" { return false } // Maxthon sometimes sends requests without host in request line, // in that case, get host information from Host header. // But if client PAC setting is using cow server's DNS name, we can't // decide if the request is for cow itself (need reverse lookup). // So if request path seems like getting PAC, simply return true. if r.URL.Path == "/pac" || strings.HasPrefix(r.URL.Path, "/pac?") { return true } r.URL.ParseHostPort(r.Header.Host) if selfListenAddr[r.URL.Host] { return true } debug.Printf("fixed request with no host in request line %s\n", r) return false } func (c *clientConn) serveSelfURL(r *Request) (err error) { if _, ok := c.proxy.(*httpProxy); !ok { goto end } if r.Method != "GET" { goto end } if r.URL.Path == "/pac" || strings.HasPrefix(r.URL.Path, "/pac?") { sendPAC(c) // PAC header contains connection close, send non nil error to close // client connection. return errPageSent } end: sendErrorPage(c, "404 not found", "Page not found", genErrMsg(r, nil, "Serving request to COW proxy.")) errl.Printf("cli(%s) page not found, serving request to cow %s\n%s", c.RemoteAddr(), r, r.Verbose()) return errPageSent } func (c *clientConn) shouldRetry(r *Request, sv *serverConn, re error) bool { if !isErrRetry(re) { return false } err, _ := re.(RetryError) if !r.responseNotSent() { if debug { debug.Printf("cli(%s) has sent some response, can't retry %v\n", c.RemoteAddr(), r) } return false } if r.partial { if debug { debug.Printf("cli(%s) partial request, can't retry %v\n", c.RemoteAddr(), r) } sendErrorPage(c, "502 partial request", err.Error(), genErrMsg(r, sv, "Request is too large to hold in buffer, can't retry. "+ "Refresh to retry may work.")) return false } else if r.raw == nil { msg := "Please report issue to the developer: Non partial request with buffer released" errl.Println(msg, r) panic(msg) } if r.tooManyRetry() { if sv.maybeFake() { // Sometimes GFW reset will got EOF error leading to retry too many times. // In that case, consider the url as temp blocked and try parent proxy. siteStat.TempBlocked(r.URL) r.tryCnt = 0 return true } debug.Printf("cli(%s) can't retry %v tryCnt=%d\n", c.RemoteAddr(), r, r.tryCnt) sendErrorPage(c, "502 retry failed", "Can't finish HTTP request", genErrMsg(r, sv, "Has tried several times.")) return false } return true } func dbgPrintRq(c *clientConn, r *Request) { if r.Trailer { errl.Printf("cli(%s) request %s has Trailer header\n%s", c.RemoteAddr(), r, r.Verbose()) } if dbgRq { if verbose { dbgRq.Printf("cli(%s) request %s\n%s", c.RemoteAddr(), r, r.Verbose()) } else { dbgRq.Printf("cli(%s) request %s\n", c.RemoteAddr(), r) } } } type SinkWriter struct{} func (s SinkWriter) Write(p []byte) (int, error) { return len(p), nil } func (c *clientConn) serve() { var r Request var rp Response var sv *serverConn var err error var authed bool // For cow proxy server, authentication is done by matching password. if _, ok := c.proxy.(*cowProxy); ok { authed = true } defer func() { r.releaseBuf() c.Close() }() // Refer to implementation.md for the design choices on parsing the request // and response. for { if c.bufRd == nil || c.buf == nil { panic("client read buffer nil") } if err = parseRequest(c, &r); err != nil { debug.Printf("cli(%s) parse request %v\n", c.RemoteAddr(), err) if err == io.EOF || isErrConnReset(err) { return } if err != errClientTimeout { sendErrorPage(c, "404 Bad request", "Bad request", err.Error()) return } sendErrorPage(c, statusRequestTimeout, statusRequestTimeout, "Your browser didn't send a complete request in time.") return } dbgPrintRq(c, &r) // PAC may leak frequently visited sites information. But if cow // requires authentication for PAC, some clients may not be able // handle it. (e.g. Proxy SwitchySharp extension on Chrome.) if isSelfRequest(&r) { if err = c.serveSelfURL(&r); err != nil { return } continue } if auth.required && !authed { if err = Authenticate(c, &r); err != nil { errl.Printf("cli(%s) %v\n", c.RemoteAddr(), err) // Request may have body. To make things simple, close // connection so we don't need to skip request body before // reading the next request. return } authed = true } if r.isConnect && !config.TunnelAllowedPort[r.URL.Port] { sendErrorPage(c, statusForbidden, "Forbidden tunnel port", genErrMsg(&r, nil, "Please contact proxy admin.")) return } if r.ExpectContinue { sendErrorPage(c, statusExpectFailed, "Expect header not supported", "Please contact COW's developer if you see this.") // Client may have sent request body at this point. Simply close // connection so we don't need to handle this case. // NOTE: sendErrorPage tells client the connection will keep alive, but // actually it will close here. return } retry: r.tryOnce() if bool(debug) && r.isRetry() { debug.Printf("cli(%s) retry request tryCnt=%d %v\n", c.RemoteAddr(), r.tryCnt, &r) } if sv, err = c.getServerConn(&r); err != nil { if debug { debug.Printf("cli(%s) failed to get server conn %v\n", c.RemoteAddr(), &r) } // Failed connection will send error page back to the client. // For CONNECT, the client read buffer is released in copyClient2Server, // so can't go back to getRequest. if err == errPageSent && !r.isConnect { if r.hasBody() { // skip request body debug.Printf("cli(%s) skip request body %v\n", c.RemoteAddr(), &r) sendBody(SinkWriter{}, c.bufRd, int(r.ContLen), r.Chunking) } continue } return } if r.isConnect { // server connection will be closed in doConnect err = sv.doConnect(&r, c) if c.shouldRetry(&r, sv, err) { goto retry } // debug.Printf("doConnect %s to %s done\n", c.RemoteAddr(), r.URL.HostPort) return } if err = sv.doRequest(c, &r, &rp); err != nil { // For client I/O error, we can actually put server connection to // pool. But let's make thing simple for now. sv.Close() if c.shouldRetry(&r, sv, err) { goto retry } else if err == errPageSent && (!r.hasBody() || r.hasSent()) { // Can only continue if request has no body, or request body // has been read. continue } return } // Put server connection to pool, so other clients can use it. _, isCowConn := sv.Conn.(cowConn) if rp.ConnectionKeepAlive || isCowConn { if debug { debug.Printf("cli(%s) connPool put %s", c.RemoteAddr(), sv.hostPort) } // If the server connection is not going to be used soon, // release buffer before putting back to pool can save memory. sv.releaseBuf() connPool.Put(sv) } else { if debug { debug.Printf("cli(%s) server %s close conn\n", c.RemoteAddr(), sv.hostPort) } sv.Close() } if !r.ConnectionKeepAlive { if debug { debug.Printf("cli(%s) close connection\n", c.RemoteAddr()) } return } } } func genErrMsg(r *Request, sv *serverConn, what string) string { if sv == nil { return fmt.Sprintf("

HTTP Request %v

%s

", r, what) } return fmt.Sprintf("

HTTP Request %v

%s

Using %s.

", r, what, sv.Conn) } func (c *clientConn) handleBlockedRequest(r *Request, err error) error { siteStat.TempBlocked(r.URL) return RetryError{err} } func (c *clientConn) handleServerReadError(r *Request, sv *serverConn, err error, msg string) error { if debug { debug.Printf("cli(%s) server read error %s %T %v %v\n", c.RemoteAddr(), msg, err, err, r) } if err == io.EOF { return RetryError{err} } if sv.maybeFake() && maybeBlocked(err) { return c.handleBlockedRequest(r, err) } if r.responseNotSent() { sendErrorPage(c, "502 read error", err.Error(), genErrMsg(r, sv, msg)) return errPageSent } errl.Printf("cli(%s) unhandled server read error %s %v %s\n", c.RemoteAddr(), msg, err, r) return err } func (c *clientConn) handleServerWriteError(r *Request, sv *serverConn, err error, msg string) error { // This function is only called in doRequest, no response is sent to client. // So if visiting blocked site, can always retry request. if sv.maybeFake() && isErrConnReset(err) { siteStat.TempBlocked(r.URL) } return RetryError{err} } func dbgPrintRep(c *clientConn, r *Request, rp *Response) { if rp.Trailer { errl.Printf("cli(%s) response %s has Trailer header\n%s", c.RemoteAddr(), rp, rp.Verbose()) } if dbgRep { if verbose { dbgRep.Printf("cli(%s) response %s %s\n%s", c.RemoteAddr(), r, rp, rp.Verbose()) } else { dbgRep.Printf("cli(%s) response %s %s\n", c.RemoteAddr(), r, rp) } } } func (c *clientConn) readResponse(sv *serverConn, r *Request, rp *Response) (err error) { sv.initBuf() defer func() { rp.releaseBuf() }() /* if r.partial { return RetryError{errors.New("debug retry for partial request")} } */ /* // force retry for debugging if r.tryCnt == 1 { return RetryError{errors.New("debug retry in readResponse")} } */ if err = parseResponse(sv, r, rp); err != nil { return c.handleServerReadError(r, sv, err, "parse response") } dbgPrintRep(c, r, rp) // After have received the first reponses from the server, we consider // ther server as real instead of fake one caused by wrong DNS reply. So // don't time out later. sv.state = svSendRecvResponse r.state = rsRecvBody r.releaseBuf() if _, err = c.Write(rp.rawResponse()); err != nil { return err } rp.releaseBuf() if rp.hasBody(r.Method) { if err = sendBody(c, sv.bufRd, int(rp.ContLen), rp.Chunking); err != nil { if debug { debug.Printf("cli(%s) send body %v\n", c.RemoteAddr(), err) } // Non persistent connection will return nil upon successful response reading if err == io.EOF { // For persistent connection, EOF from server is error. // Response header has been read, server using persistent // connection indicates the end of response and proxy should // not got EOF while reading response. // The client connection will be closed to indicate this error. // Proxy can't send error page here because response header has // been sent. return fmt.Errorf("read response body unexpected EOF %v", rp) } else if isErrOpRead(err) { return c.handleServerReadError(r, sv, err, "read response body") } // errl.Printf("cli(%s) sendBody error %T %v %v", err, err, r) return err } } r.state = rsDone /* if debug { debug.Printf("[Finished] %v request %s %s\n", c.RemoteAddr(), r.Method, r.URL) } */ if rp.ConnectionKeepAlive { if rp.KeepAlive == time.Duration(0) { sv.willCloseOn = time.Now().Add(defaultServerConnTimeout) } else { // debug.Printf("cli(%s) server %s keep-alive %v\n", c.RemoteAddr(), sv.hostPort, rp.KeepAlive) sv.willCloseOn = time.Now().Add(rp.KeepAlive) } } return } func (c *clientConn) getServerConn(r *Request) (*serverConn, error) { siteInfo := siteStat.GetVisitCnt(r.URL) // For CONNECT method, always create new connection. if r.isConnect { return c.createServerConn(r, siteInfo) } sv := connPool.Get(r.URL.HostPort, siteInfo.AsDirect()) if sv != nil { // For websites like feedly, the site itself is not blocked, but the // content it loads may result reset. So we should reset server // connection state to just connected. sv.state = svConnected if debug { debug.Printf("cli(%s) connPool get %s\n", c.RemoteAddr(), r.URL.HostPort) } return sv, nil } if debug { debug.Printf("cli(%s) connPool no conn %s", c.RemoteAddr(), r.URL.HostPort) } return c.createServerConn(r, siteInfo) } func connectDirect2(url *URL, siteInfo *VisitCnt, recursive bool) (net.Conn, error) { var c net.Conn var err error if siteInfo.AlwaysDirect() { c, err = net.Dial("tcp", url.HostPort) } else { to := dialTimeout if siteInfo.OnceBlocked() && to >= defaultDialTimeout { // If once blocked, decrease timeout to switch to parent proxy faster. to = minDialTimeout } else if siteInfo.AsDirect() { // If usually can be accessed directly, increase timeout to avoid // problems when network condition is bad. to = maxTimeout } c, err = net.DialTimeout("tcp", url.HostPort, to) } if err != nil { debug.Printf("error direct connect to: %s %v\n", url.HostPort, err) if isErrTooManyOpenFd(err) && !recursive { return connectDirect2(url, siteInfo, true) } return nil, err } // debug.Println("directly connected to", url.HostPort) return directConn{c}, nil } func connectDirect(url *URL, siteInfo *VisitCnt) (net.Conn, error) { return connectDirect2(url, siteInfo, false) } func isErrTimeout(err error) bool { if ne, ok := err.(net.Error); ok { return ne.Timeout() } return false } func isHttpErrCode(err error) bool { if config.HttpErrorCode <= 0 { return false } if err == CustomHttpErr { return true } return false } func maybeBlocked(err error) bool { if parentProxy.empty() { return false } return isErrTimeout(err) || isErrConnReset(err) || isHttpErrCode(err) } // Connect to requested server according to whether it's visit count. // If direct connection fails, try parent proxies. func (c *clientConn) connect(r *Request, siteInfo *VisitCnt) (srvconn net.Conn, err error) { var errMsg string if config.AlwaysProxy { if srvconn, err = parentProxy.connect(r.URL); err == nil { return } errMsg = genErrMsg(r, nil, "Parent proxy connection failed, always use parent proxy.") goto fail } if siteInfo.AsBlocked() && !parentProxy.empty() { // In case of connection error to socks server, fallback to direct connection if srvconn, err = parentProxy.connect(r.URL); err == nil { return } if siteInfo.AlwaysBlocked() { errMsg = genErrMsg(r, nil, "Parent proxy connection failed, always blocked site.") goto fail } if siteInfo.AsTempBlocked() { errMsg = genErrMsg(r, nil, "Parent proxy connection failed, temporarily blocked site.") goto fail } if srvconn, err = connectDirect(r.URL, siteInfo); err == nil { return } errMsg = genErrMsg(r, nil, "Parent proxy and direct connection failed, maybe blocked site.") } else { // In case of error on direction connection, try parent server if srvconn, err = connectDirect(r.URL, siteInfo); err == nil { return } if parentProxy.empty() { errMsg = genErrMsg(r, nil, "Direct connection failed, no parent proxy.") goto fail } if siteInfo.AlwaysDirect() { errMsg = genErrMsg(r, nil, "Direct connection failed, always direct site.") goto fail } // net.Dial does two things: DNS lookup and TCP connection. // GFW may cause failure here: make it time out or reset connection. // debug.Printf("type of err %T %v\n", err, err) // RST during TCP handshake is valid and would return as connection // refused error. My observation is that GFW does not use RST to stop // TCP handshake. // To simplify things and avoid error in my observation, always try // parent proxy in case of Dial error. var socksErr error if srvconn, socksErr = parentProxy.connect(r.URL); socksErr == nil { c.handleBlockedRequest(r, err) if debug { debug.Printf("cli(%s) direct connection failed, use parent proxy for %v\n", c.RemoteAddr(), r) } return srvconn, nil } errMsg = genErrMsg(r, nil, "Direct and parent proxy connection failed, maybe blocked site.") } fail: sendErrorPage(c, "504 Connection failed", err.Error(), errMsg) return nil, errPageSent } func (c *clientConn) createServerConn(r *Request, siteInfo *VisitCnt) (*serverConn, error) { srvconn, err := c.connect(r, siteInfo) if err != nil { return nil, err } sv := newServerConn(srvconn, r.URL.HostPort, siteInfo) if debug { debug.Printf("cli(%s) connected to %s %d concurrent connections\n", c.RemoteAddr(), sv.hostPort, incSrvConnCnt(sv.hostPort)) } return sv, nil } // Should call initBuf before reading http response from server. This allows // us not init buf for connect method which does not need to parse http // respnose. func newServerConn(c net.Conn, hostPort string, siteInfo *VisitCnt) *serverConn { sv := &serverConn{ Conn: c, hostPort: hostPort, siteInfo: siteInfo, } return sv } func (sv *serverConn) isDirect() bool { _, ok := sv.Conn.(directConn) return ok } func (sv *serverConn) updateVisit() { if sv.visited { return } sv.visited = true if sv.isDirect() { sv.siteInfo.DirectVisit() } else { sv.siteInfo.BlockedVisit() } } func (sv *serverConn) initBuf() { if sv.bufRd == nil { sv.buf = httpBuf.Get() sv.bufRd = bufio.NewReaderFromBuf(sv, sv.buf) } } func (sv *serverConn) releaseBuf() { if sv.bufRd != nil { // debug.Println("release server buffer") httpBuf.Put(sv.buf) sv.buf = nil sv.bufRd = nil } } func (sv *serverConn) Close() error { sv.releaseBuf() if debug { debug.Printf("close connection to %s remains %d concurrent connections\n", sv.hostPort, decSrvConnCnt(sv.hostPort)) } return sv.Conn.Close() } func (sv *serverConn) maybeFake() bool { return sv.state == svConnected && sv.isDirect() && !sv.siteInfo.AlwaysDirect() } func setConnReadTimeout(cn net.Conn, d time.Duration, msg string) { if err := cn.SetReadDeadline(time.Now().Add(d)); err != nil { errl.Println("set readtimeout:", msg, err) } } func unsetConnReadTimeout(cn net.Conn, msg string) { if err := cn.SetReadDeadline(zeroTime); err != nil { // It's possible that conn has been closed, so use debug log. debug.Println("unset readtimeout:", msg, err) } } func (sv *serverConn) setReadTimeout(msg string) { to := readTimeout if sv.siteInfo.OnceBlocked() && to > defaultReadTimeout { to = minReadTimeout } else if sv.siteInfo.AsDirect() { to = maxTimeout } setConnReadTimeout(sv.Conn, to, msg) } func (sv *serverConn) unsetReadTimeout(msg string) { unsetConnReadTimeout(sv.Conn, msg) } func (sv *serverConn) maybeSSLErr(cliStart time.Time) bool { // If client closes connection very soon, maybe there's SSL error, maybe // not (e.g. user stopped request). // COW can't tell which is the case, so this detection is not reliable. return sv.state > svConnected && time.Now().Sub(cliStart) < sslLeastDuration } func (sv *serverConn) mayBeClosed() bool { if _, ok := sv.Conn.(cowConn); ok { debug.Println("cow parent would keep alive") return false } return time.Now().After(sv.willCloseOn) } // Use smaller buffer for connection method as the buffer will be hold for a // very long time. const connectBufSize = 4096 // Hold at most 2M memory for connection buffer. This can support 256 // concurrent connect method. var connectBuf = leakybuf.NewLeakyBuf(512, connectBufSize) func copyServer2Client(sv *serverConn, c *clientConn, r *Request) (err error) { buf := connectBuf.Get() defer func() { connectBuf.Put(buf) }() /* // force retry for debugging if r.tryCnt == 1 && sv.maybeFake() { time.Sleep(1) return RetryError{errors.New("debug retry in copyServer2Client")} } */ total := 0 const directThreshold = 8192 readTimeoutSet := false for { // debug.Println("srv->cli") if sv.maybeFake() { sv.setReadTimeout("srv->cli") readTimeoutSet = true } else if readTimeoutSet { sv.unsetReadTimeout("srv->cli") readTimeoutSet = false } var n int if n, err = sv.Read(buf); err != nil { if sv.maybeFake() && maybeBlocked(err) { siteStat.TempBlocked(r.URL) debug.Printf("srv->cli blocked site %s detected, err: %v retry\n", r.URL.HostPort, err) return RetryError{err} } // Expected error besides EOF: "use of closed network connection", // this is to make blocking read return. // debug.Printf("copyServer2Client read data: %v\n", err) return } total += n if _, err = c.Write(buf[0:n]); err != nil { // debug.Printf("copyServer2Client write data: %v\n", err) return } // debug.Printf("srv(%s)->cli(%s) sent %d bytes data\n", r.URL.HostPort, c.RemoteAddr(), total) // set state to rsRecvBody to indicate the request has partial response sent to client r.state = rsRecvBody sv.state = svSendRecvResponse if total > directThreshold { sv.updateVisit() } } } type serverWriter struct { rq *Request sv *serverConn } func newServerWriter(r *Request, sv *serverConn) *serverWriter { return &serverWriter{r, sv} } // Write to server, store written data in request buffer if necessary. // We have to save request body in order to retry request. // FIXME: too tighly coupled with Request. func (sw *serverWriter) Write(p []byte) (int, error) { if sw.rq.raw == nil { // buffer released } else if sw.rq.raw.Len() >= 2*httpBufSize { // Avoid using too much memory to hold request body. If a request is // not buffered completely, COW can't retry and can release memory // immediately. debug.Println(sw.rq, "request body too large, not buffering any more") sw.rq.releaseBuf() sw.rq.partial = true } else if sw.rq.responseNotSent() { sw.rq.raw.Write(p) } else { // has sent response, happens when saving data for CONNECT method sw.rq.releaseBuf() } return sw.sv.Write(p) } func copyClient2Server(c *clientConn, sv *serverConn, r *Request, srvStopped notification, done chan struct{}) (err error) { // sv.maybeFake may change during execution in this function. // So need a variable to record the whether timeout is set. deadlineIsSet := false defer func() { if deadlineIsSet { // May need to retry, unset timeout here to avoid read client // timeout on retry. Note c.Conn maybe closed when calling this. unsetConnReadTimeout(c.Conn, "cli->srv after err") } close(done) }() var n int if r.isRetry() { if debug { debug.Printf("cli(%s)->srv(%s) retry request %d bytes of buffered body\n", c.RemoteAddr(), r.URL.HostPort, len(r.rawBody())) } if _, err = sv.Write(r.rawBody()); err != nil { debug.Println("cli->srv send to server error") return } } w := newServerWriter(r, sv) if c.bufRd != nil { n = c.bufRd.Buffered() if n > 0 { buffered, _ := c.bufRd.Peek(n) // should not return error if _, err = w.Write(buffered); err != nil { // debug.Printf("cli->srv write buffered err: %v\n", err) return } } if debug { debug.Printf("cli(%s)->srv(%s) released read buffer\n", c.RemoteAddr(), r.URL.HostPort) } c.releaseBuf() } var start time.Time if config.DetectSSLErr { start = time.Now() } buf := connectBuf.Get() defer func() { connectBuf.Put(buf) }() for { // debug.Println("cli->srv") if sv.maybeFake() { setConnReadTimeout(c.Conn, time.Second, "cli->srv") deadlineIsSet = true } else if deadlineIsSet { // maybeFake may trun to false after timeout, but timeout should be unset unsetConnReadTimeout(c.Conn, "cli->srv before read") deadlineIsSet = false } if n, err = c.Read(buf); err != nil { if config.DetectSSLErr && sv.maybeFake() && (isErrConnReset(err) || err == io.EOF) && sv.maybeSSLErr(start) { debug.Println("client connection closed very soon, taken as SSL error:", r) siteStat.TempBlocked(r.URL) } else if isErrTimeout(err) && !srvStopped.hasNotified() { // debug.Printf("cli(%s)->srv(%s) timeout\n", c.RemoteAddr(), r.URL.HostPort) continue } // debug.Printf("cli->srv read err: %v\n", err) return } // copyServer2Client will detect write to closed server. Just store client content for retry. if _, err = w.Write(buf[:n]); err != nil { // XXX is it enough to only do block detection in copyServer2Client? /* if sv.maybeFake() && isErrConnReset(err) { siteStat.TempBlocked(r.URL) errl.Printf("copyClient2Server blocked site %d detected, retry\n", r.URL.HostPort) return RetryError{err} } */ // debug.Printf("cli->srv write err: %v\n", err) return } // debug.Printf("cli(%s)->srv(%s) sent %d bytes data\n", c.RemoteAddr(), r.URL.HostPort, n) } } var connEstablished = []byte("HTTP/1.1 200 Tunnel established\r\n\r\n") // Do HTTP CONNECT func (sv *serverConn) doConnect(r *Request, c *clientConn) (err error) { r.state = rsCreated _, isHttpConn := sv.Conn.(httpConn) _, isCowConn := sv.Conn.(cowConn) if isHttpConn || isCowConn { if debug { debug.Printf("cli(%s) send CONNECT request to parent\n", c.RemoteAddr()) } if err = sv.sendHTTPProxyRequestHeader(r, c); err != nil { debug.Printf("cli(%s) error send CONNECT request to parent: %v\n", c.RemoteAddr(), err) return err } } else if !r.isRetry() { // debug.Printf("send connection confirmation to %s->%s\n", c.RemoteAddr(), r.URL.HostPort) if _, err = c.Write(connEstablished); err != nil { debug.Printf("cli(%s) error send 200 Connecion established: %v\n", c.RemoteAddr(), err) return err } } var cli2srvErr error done := make(chan struct{}) srvStopped := newNotification() go func() { // debug.Printf("doConnect: cli(%s)->srv(%s)\n", c.RemoteAddr(), r.URL.HostPort) cli2srvErr = copyClient2Server(c, sv, r, srvStopped, done) // Close sv to force read from server in copyServer2Client return. // Note: there's no other code closing the server connection for CONNECT. sv.Close() }() // debug.Printf("doConnect: srv(%s)->cli(%s)\n", r.URL.HostPort, c.RemoteAddr()) err = copyServer2Client(sv, c, r) if isErrRetry(err) { srvStopped.notify() <-done // debug.Printf("doConnect: cli(%s)->srv(%s) stopped\n", c.RemoteAddr(), r.URL.HostPort) } else { // close client connection to force read from client in copyClient2Server return c.Conn.Close() } if isErrRetry(cli2srvErr) { return cli2srvErr } return } func (sv *serverConn) sendHTTPProxyRequestHeader(r *Request, c *clientConn) (err error) { if _, err = sv.Write(r.proxyRequestLine()); err != nil { return c.handleServerWriteError(r, sv, err, "send proxy request line to http parent") } if hc, ok := sv.Conn.(httpConn); ok && hc.parent.authHeader != nil { // Add authorization header for parent http proxy if _, err = sv.Write(hc.parent.authHeader); err != nil { return c.handleServerWriteError(r, sv, err, "send proxy authorization header to http parent") } } // When retry, body is in raw buffer. if _, err = sv.Write(r.rawHeaderBody()); err != nil { return c.handleServerWriteError(r, sv, err, "send proxy request header to http parent") } /* if bool(dbgRq) && verbose { debug.Printf("request to http proxy:\n%s%s", r.proxyRequestLine(), r.rawHeaderBody()) } */ return } func (sv *serverConn) sendRequestHeader(r *Request, c *clientConn) (err error) { // Send request to the server switch sv.Conn.(type) { case httpConn, cowConn: return sv.sendHTTPProxyRequestHeader(r, c) } /* if bool(debug) && verbose { debug.Printf("request to server\n%s", r.rawRequest()) } */ if _, err = sv.Write(r.rawRequest()); err != nil { err = c.handleServerWriteError(r, sv, err, "send request to server") } return } func (sv *serverConn) sendRequestBody(r *Request, c *clientConn) (err error) { // Send request body. If this is retry, r.raw contains request body and is // sent while sending raw request. if !r.hasBody() || r.isRetry() { return } err = sendBody(newServerWriter(r, sv), c.bufRd, int(r.ContLen), r.Chunking) if err != nil { errl.Printf("cli(%s) send request body error %v %s\n", c.RemoteAddr(), err, r) if isErrOpWrite(err) { err = c.handleServerWriteError(r, sv, err, "send request body") } return } if debug { debug.Printf("cli(%s) request body sent %s\n", c.RemoteAddr(), r) } return } // Do HTTP request other that CONNECT func (sv *serverConn) doRequest(c *clientConn, r *Request, rp *Response) (err error) { r.state = rsCreated if err = sv.sendRequestHeader(r, c); err != nil { return } if err = sv.sendRequestBody(r, c); err != nil { return } r.state = rsSent if err = c.readResponse(sv, r, rp); err == nil { sv.updateVisit() } return err } // Send response body if header specifies content length func sendBodyWithContLen(w io.Writer, r *bufio.Reader, contLen int) (err error) { // debug.Println("Sending body with content length", contLen) if contLen == 0 { return } if err = copyN(w, r, contLen, httpBufSize); err != nil { debug.Println("sendBodyWithContLen error:", err) } return } // Use this function until we find Trailer headers actually in use. func skipTrailer(r *bufio.Reader) error { // It's possible to get trailer headers, but the body will always end with // a line with just CRLF. for { s, err := r.ReadSlice('\n') if err != nil { errl.Println("skip trailer:", err) return err } if len(s) == 2 && s[0] == '\r' && s[1] == '\n' { return nil } errl.Printf("skip trailer: %#v", string(s)) if len(s) == 1 || len(s) == 2 { return fmt.Errorf("malformed chunk body end: %#v", string(s)) } } } func skipCRLF(r *bufio.Reader) (err error) { var buf [2]byte if _, err = io.ReadFull(r, buf[:]); err != nil { errl.Println("skip chunk body end:", err) return } if buf[0] != '\r' || buf[1] != '\n' { return fmt.Errorf("malformed chunk body end: %#v", string(buf[:])) } return } // Send response body if header specifies chunked encoding. rdSize specifies // the size of each read on Reader, it should be set to be the buffer size of // the Reader, this parameter is added for testing. func sendBodyChunked(w io.Writer, r *bufio.Reader, rdSize int) (err error) { // debug.Println("Sending chunked body") for { var s []byte // Read chunk size line, ignore chunk extension if any. if s, err = r.PeekSlice('\n'); err != nil { errl.Println("peek chunk size:", err) return } smid := bytes.IndexByte(s, ';') if smid == -1 { smid = len(s) } else { // use error log to find usage of chunk extension errl.Printf("got chunk extension: %s\n", s) } var size int64 if size, err = ParseIntFromBytes(TrimSpace(s[:smid]), 16); err != nil { errl.Println("chunk size invalid:", err) return } /* if debug { // To debug getting malformed response status line with "0\r\n". if c, ok := w.(*clientConn); ok { debug.Printf("cli(%s) chunk size %d %#v\n", c.RemoteAddr(), size, string(s)) } } */ if size == 0 { r.Skip(len(s)) if err = skipCRLF(r); err != nil { return } if _, err = w.Write([]byte(chunkEnd)); err != nil { debug.Println("send chunk ending:", err) } return } // RFC 2616 19.3 only suggest tolerating single LF for // headers, not for chunked encoding. So assume the server will send // CRLF. If not, the following parse int may find errors. total := len(s) + int(size) + 2 // total data size for this chunk, including ending CRLF // PeekSlice will not advance reader, so we can just copy total sized data. if err = copyN(w, r, total, rdSize); err != nil { debug.Println("copy chunked data:", err) return } } } const chunkEnd = "0\r\n\r\n" func sendBodySplitIntoChunk(w io.Writer, r *bufio.Reader) (err error) { // debug.Printf("sendBodySplitIntoChunk called\n") var b []byte for { b, err = r.ReadNext() // debug.Println("split into chunk n =", n, "err =", err) if err != nil { if err == io.EOF { // EOF is expected here as the server is closing connection. // debug.Println("end chunked encoding") _, err = w.Write([]byte(chunkEnd)) if err != nil { debug.Println("write chunk end 0", err) } return } debug.Println("read error in sendBodySplitIntoChunk", err) return } chunkSize := []byte(fmt.Sprintf("%x\r\n", len(b))) if _, err = w.Write(chunkSize); err != nil { debug.Printf("write chunk size %v\n", err) return } if _, err = w.Write(b); err != nil { debug.Println("write chunk data:", err) return } if _, err = w.Write([]byte(CRLF)); err != nil { debug.Println("write chunk ending CRLF:", err) return } } } // Send message body. func sendBody(w io.Writer, bufRd *bufio.Reader, contLen int, chunk bool) (err error) { // chunked encoding has precedence over content length // COW does not sanitize response header, but can correctly handle it if chunk { err = sendBodyChunked(w, bufRd, httpBufSize) } else if contLen >= 0 { // It's possible to have content length 0 if server response has no // body. err = sendBodyWithContLen(w, bufRd, int(contLen)) } else { // Must be reading server response here, because sendBody is called in // reading response iff chunked or content length > 0. err = sendBodySplitIntoChunk(w, bufRd) } return } ================================================ FILE: proxy_test.go ================================================ package main import ( "bytes" "github.com/cyfdecyf/bufio" "strings" "testing" ) func TestSendBodyChunked(t *testing.T) { testData := []struct { raw string want string // empty means same as raw }{ {"1a; ignore-stuff-here\r\nabcdefghijklmnopqrstuvwxyz\r\n10\r\n1234567890abcdef\r\n0\r\n\r\n", ""}, {"0\r\n\r\n", ""}, /* {"0\n\r\n", "0\r\n\r\n"}, // test for buggy web servers {"1a; ignore-stuff-here\nabcdefghijklmnopqrstuvwxyz\r\n10\n1234567890abcdef\n0\n\n", // COW will only sanitize CRLF at chunk ending "1a; ignore-stuff-here\nabcdefghijklmnopqrstuvwxyz\r\n10\n1234567890abcdef\r\n0\r\n\r\n"}, */ } // supress error log when finding chunk extension errl = false defer func() { errl = true }() // use different reader buffer size to test for both all buffered and partially buffered chunk sizeArr := []int{32, 64, 128} for _, size := range sizeArr { for _, td := range testData { r := bufio.NewReaderSize(strings.NewReader(td.raw), size) w := new(bytes.Buffer) if err := sendBodyChunked(w, r, size); err != nil { t.Fatalf("sent data %q err: %v\n", w.Bytes(), err) } if td.want == "" { if w.String() != td.raw { t.Errorf("sendBodyChunked wrong with buf size %d, raw data is:\n%q\ngot:\n%q\n", size, td.raw, w.String()) } } else { if w.String() != td.want { t.Errorf("sendBodyChunked wrong with buf sizwe %d, raw data is:\n%q\nwant:\n%q\ngot :\n%q\n", size, td.raw, td.want, w.String()) } } } } } func TestInitSelfListenAddr(t *testing.T) { listenProxy = []Proxy{newHttpProxy("0.0.0.0:7777", "")} initSelfListenAddr() testData := []struct { r Request self bool }{ {Request{Header: Header{Host: "google.com:443"}, URL: &URL{}}, false}, {Request{Header: Header{Host: "localhost"}, URL: &URL{}}, true}, {Request{Header: Header{Host: "127.0.0.1:7777"}, URL: &URL{}}, true}, {Request{Header: Header{Host: ""}, URL: &URL{HostPort: "google.com"}}, false}, {Request{Header: Header{Host: "localhost"}, URL: &URL{HostPort: "google.com"}}, false}, } for _, td := range testData { if isSelfRequest(&td.r) != td.self { t.Error(td.r.Host, "isSelfRequest should be", td.self) } if td.self && td.r.URL.Host == "" { t.Error("isSelfRequest should set url host", td.r.Header.Host) } } // Another set of listen addr. listenProxy = []Proxy{ newHttpProxy("192.168.1.1:7777", ""), newHttpProxy("127.0.0.1:8888", ""), } initSelfListenAddr() testData2 := []struct { r Request self bool }{ {Request{Header: Header{Host: "google.com:443"}, URL: &URL{}}, false}, {Request{Header: Header{Host: "localhost"}, URL: &URL{}}, true}, {Request{Header: Header{Host: "127.0.0.1:8888"}, URL: &URL{}}, true}, {Request{Header: Header{Host: "192.168.1.1"}, URL: &URL{}}, true}, {Request{Header: Header{Host: "192.168.1.2"}, URL: &URL{}}, false}, {Request{Header: Header{Host: ""}, URL: &URL{HostPort: "google.com"}}, false}, {Request{Header: Header{Host: "localhost"}, URL: &URL{HostPort: "google.com"}}, false}, } for _, td := range testData2 { if isSelfRequest(&td.r) != td.self { t.Error(td.r.Host, "isSelfRequest should be", td.self) } if td.self && td.r.URL.Host == "" { t.Error("isSelfRequest should set url host", td.r.Header.Host) } } } ================================================ FILE: proxy_unix.go ================================================ // +build darwin freebsd linux netbsd openbsd package main import ( "net" "syscall" "strings" ) func isErrConnReset(err error) bool { if ne, ok := err.(*net.OpError); ok { return strings.Contains(ne.Err.Error(), syscall.ECONNRESET.Error()) } return false } func isDNSError(err error) bool { if _, ok := err.(*net.DNSError); ok { return true } return false } func isErrOpWrite(err error) bool { ne, ok := err.(*net.OpError) if !ok { return false } return ne.Op == "write" } func isErrOpRead(err error) bool { ne, ok := err.(*net.OpError) if !ok { return false } return ne.Op == "read" } func isErrTooManyOpenFd(err error) bool { if ne, ok := err.(*net.OpError); ok && (ne.Err == syscall.EMFILE || ne.Err == syscall.ENFILE) { errl.Println("too many open fd") return true } return false } ================================================ FILE: proxy_windows.go ================================================ package main import ( "fmt" "net" "strings" "syscall" ) var _ = fmt.Println func isErrConnReset(err error) bool { // fmt.Printf("calling isErrConnReset for err type: %v Error() %s\n", // reflect.TypeOf(err), err.Error()) if ne, ok := err.(*net.OpError); ok { // fmt.Println("isErrConnReset net.OpError.Err type:", reflect.TypeOf(ne)) if errno, enok := ne.Err.(syscall.Errno); enok { // I got these number by print. Only tested on XP. // fmt.Printf("isErrConnReset errno: %d\n", errno) return errno == 64 || errno == 10054 } } return false } func isDNSError(err error) bool { /* fmt.Printf("calling isDNSError for err type: %v %s\n", reflect.TypeOf(err), err.Error()) */ // DNS error are not of type DNSError on Windows, so I used this ugly // hack. errMsg := err.Error() return strings.Contains(errMsg, "No such host") || strings.Contains(errMsg, "GetAddrInfoW") || strings.Contains(errMsg, "dial tcp") } func isErrOpWrite(err error) bool { ne, ok := err.(*net.OpError) if !ok { return false } return ne.Op == "WSASend" } func isErrOpRead(err error) bool { ne, ok := err.(*net.OpError) if !ok { return false } return ne.Op == "WSARecv" } func isErrTooManyOpenFd(err error) bool { // TODO implement this. return false } ================================================ FILE: script/README.md ================================================ # About cow-taskbar.exe Copied `goagent.exe`, modified the string table and icon using reshack. Thanks for the taskbar project created by @phuslu. # About cow-hide.exe Allow you to run COW as a background process, without any notifications. Provided by @xupefei's [cow-hide](https://github.com/xupefei/cow-hide) project. Icon from [IconArchive](http://www.iconarchive.com/show/animal-icons-by-martin-berube/cow-icon.html), thanks to the author Martin Berube. ================================================ FILE: script/build.sh ================================================ #!/bin/bash cd "$( dirname "${BASH_SOURCE[0]}" )/.." version=`grep '^version=' ./install-cow.sh | sed -s 's/version=//'` echo "creating cow binary version $version" mkdir -p bin build() { local name local goos local goarch local goarm local cgo local armv goos="GOOS=$1" goarch="GOARCH=$2" arch=$3 if [[ $2 == "arm" ]]; then armv=`echo $arch | grep -o [0-9]` goarm="GOARM=$armv" fi if [[ $1 == "darwin" ]]; then # Enable CGO for OS X so change network location will not cause problem. cgo="CGO_ENABLED=1" else cgo="CGO_ENABLED=0" fi name=cow-$arch-$version echo "building $name" echo $cgo $goos $goarch $goarm go build eval $cgo $goos $goarch $goarm go build || exit 1 if [[ $1 == "windows" ]]; then mv cow.exe script pushd script sed -e 's/$/\r/' ../doc/sample-config/rc > rc.txt zip $name.zip cow.exe cow-taskbar.exe cow-hide.exe rc.txt rm -f cow.exe rc.txt mv $name.zip ../bin/ popd else mv cow bin/$name gzip -f bin/$name fi } build darwin amd64 mac64 #build darwin 386 mac32 build linux amd64 linux64 build linux 386 linux32 build linux arm linux-armv5tel build linux arm linux-armv6l build linux arm linux-armv7l build windows amd64 win64 build windows 386 win32 ================================================ FILE: script/debugrc ================================================ listen = cow://aes-128-cfb:foobar@127.0.0.1:8899 ================================================ FILE: script/httprc ================================================ listen = http://127.0.0.1:7788 proxy = cow://aes-128-cfb:foobar@127.0.0.1:8899 alwaysProxy = true ================================================ FILE: script/log-group-by-client.sh ================================================ #!/bin/bash if [[ $# != 1 ]]; then echo "Usage: $0 " exit 1 fi log=$1 #clients=`egrep 'cli\([^)]+\) connected, total' $log | cut -d ' ' -f 4` #for c in $clients; do #echo $c #done sort --stable --key 4,4 --key 3,3 $log | sed -e "/closed, total/s,\$,\n\n," > $log-grouped ================================================ FILE: script/set-version.sh ================================================ #!/bin/bash cd "$( dirname "${BASH_SOURCE[0]}" )/.." if [ $# != 1 ]; then echo "Usage: $0 " exit 1 fi version=$1 #echo $version sed -i -e "s,\(\tversion \+= \)\".*\"$,\1\"$version\"," config.go sed -i -e "s/version=.*$/version=$version/" install-cow.sh sed -i -e "s/当前版本:[^ ]\+ \(.*\)\$/当前版本:$version \1/" README.md sed -i -e "s/Current version: [^ ]\+ \(.*\)\$/Current version: $version \1/" README-en.md ================================================ FILE: script/test.sh ================================================ #!/bin/bash cd "$( dirname "${BASH_SOURCE[0]}" )/.." if ! go build; then echo "build failed" exit 1 fi PROXY_ADDR=127.0.0.1:7788 COW_ADDR=127.0.0.1:8899 if [[ -z "$TRAVIS" ]]; then RCDIR=~/.cow/ else # on travis RCDIR=./script/ fi ./cow -rc $RCDIR/debugrc -listen=cow://aes-128-cfb:foobar@$COW_ADDR & parent_pid=$! ./cow -rc ./script/httprc -listen=http://$PROXY_ADDR & cow_pid=$! stop_cow() { kill -SIGTERM $parent_pid kill -SIGTERM $cow_pid } trap 'stop_cow' TERM INT sleep 1 test_get() { local url url=$1 target=$2 noproxy=$3 code=$4 echo -n "GET $url " if [[ -z $code ]]; then code="200" fi # get 5 times for i in {1..2}; do # -s silent to disable progress meter, but enable --show-error # -i to include http header # -L to follow redirect so we should always get HTTP 200 if [[ -n $noproxy ]]; then cont=`curl -s --show-error -i -L $url 2>&1` else cont=`curl -s --show-error -i -L -x $PROXY_ADDR $url 2>&1` fi ok=`echo $cont | grep -E -o "HTTP/1\.1 +$code"` html=`echo $cont | grep -E -o -i "$target"` if [[ -z $ok || -z $html ]] ; then echo "==============================" echo "GET $url FAILED!!!" echo "$ok" echo "$html" echo $cont echo "==============================" kill -SIGTERM $cow_pid exit 1 fi sleep 0.3 done echo "passed" } test_get $PROXY_ADDR/pac "apple.com" "noproxy" # test for pac test_get google.com "" # blocked site, all kinds of block method test_get https://google.com "" # Sites that may timeout on travis. if [[ -z $TRAVIS ]]; then test_get plan9.bell-labs.com/magic/man2html/1/2l "" "" "404" # single LF in response header test_get www.wpxap.com "" # 301 redirect test_get www.taobao.com "" # chunked encoding, weird can't tests for in script test_get https://www.alipay.com "" fi stop_cow sleep 0.5 rm -f ./script/stat* exit 0 ================================================ FILE: script/upload.sh ================================================ #!/bin/bash cd "$( dirname "${BASH_SOURCE[0]}" )/.." if [[ $# != 2 ]]; then echo "upload.sh " exit 1 fi version=`grep '^version=' ./install-cow.sh | sed -s 's/version=//'` username=$1 passwd=$2 upload() { summary=$1 file=$2 googlecode_upload.py -l Featured -u "$username" -w "$passwd" -s "$summary" -p cow-proxy "$file" } upload "$version for Linux 32bit" bin/cow-linux32-$version.gz upload "$version for Linux 64bit" bin/cow-linux64-$version.gz upload "$version for Windows 64bit" bin/cow-win64-$version.zip upload "$version for Windows 32bit" bin/cow-win32-$version.zip upload "$version for OS X 64bit" bin/cow-mac64-$version.gz ================================================ FILE: site_blocked.go ================================================ package main var blockedDomainList = []string{ "bit.ly", "j.mp", "bitly.com", "fbcdn.net", "facebook.com", "plus.google.com", "plusone.google.com", "t.co", "twimg.com", "twitpic.com", "twitter.com", "youtu.be", "youtube.com", "ytimg.com", } ================================================ FILE: site_direct.go ================================================ package main var directDomainList = []string{ // 视频 "xunlei.com", // 迅雷 "kankan.com", "kanimg.com", "tdimg.com", // 土豆 "tudou.com", "tudouui.com", "soku.com", // 优酷 "youku.com", "ykimg.com", "ku6.cn", // 酷六 "ku6.com", "ku6cdn.com", "ku6img.com", // 电商 "z.cn", "amazon.cn", "360buy.com", "360buyimg.com", "jd.com", "51buy.com", "icson.com", "dangdang.com", "ddimg.cn", "yihaodian.com", "yihaodianimg.com", "paipai.com", "paipaiimg.com", "tmall.com", "taobao.com", "taobaocdn.com", "tbcdn.cn", "etao.com", "aicdn.com", "alicdn.com", "alimama.cn", "alimama.com", "alipay.com", "alipayobjects.com", // 银行 "bankcomm.com", "bankofchina.com", "95559.com.cn", "abchina.com", "95599.cn", "boc.cn", "ccb.com", "cmbchina.com", "icbc.com.cn", "spdb.com.cn", // 社交 "douban.com", "t.cn", "weibo.com", "zhihu.com", "kaixin001.com", "qq.com", "renren.com", "rrimg.com", "xiaonei.com", "xnimg.cn", "xnpic.com", "dianping.com", // 点评 "dpfile.com", "huaban.com", // 又拍云的几个 "yupoo.com", "upyun.com", "upaiyun.com", // 新闻门户 "ifanr.cn", "ifanr.com", "163.com", "hexun.com", "sina.com.cn", "sinaapp.com", "sinaimg.cn", "sinajs.cn", "sohu.com", "solidot.org", // 搜索 "bing.com", "bing.com.cn", "baidu.com", "bdstatic.com", "bdimg.com", "youdao.com", "sogou.com", // Apple "apple.com", "apple.com.cn", "icloud.com", // 其他 "macromedia.com", "mmcdn.cn", "12306.cn", } ================================================ FILE: sitestat.go ================================================ package main import ( "encoding/json" "errors" "fmt" "io/ioutil" "math/rand" "os" "strings" "sync" "time" "github.com/cyfdecyf/bufio" ) func init() { rand.Seed(time.Now().Unix()) } // VisitCnt and SiteStat are used to track how many times a site is visited. // With this information: COW knows which sites are frequently visited, and // judging whether a site is blocked or not is more reliable. const ( directDelta = 5 blockedDelta = 5 maxCnt = 100 // no protect to update visit cnt, smaller value is unlikely to overflow userCnt = -1 // this represents user specified host or domain ) type siteVisitMethod int // minus operation on visit count may get negative value, so use signed int type vcntint int8 type Date time.Time const dateLayout = "2006-01-02" func (d Date) MarshalJSON() ([]byte, error) { return []byte("\"" + time.Time(d).Format(dateLayout) + "\""), nil } func (d *Date) UnmarshalJSON(input []byte) error { if len(input) != len(dateLayout)+2 { return errors.New(fmt.Sprintf("unmarshaling date: invalid input %s", string(input))) } input = input[1 : len(dateLayout)+1] t, err := time.Parse(dateLayout, string(input)) *d = Date(t) return err } // COW don't need very accurate visit count, so update to visit count value is // not protected. type VisitCnt struct { Direct vcntint `json:"direct"` Blocked vcntint `json:"block"` Recent Date `json:"recent"` rUpdated bool // whether Recent is updated, we only need date precision blockedOn time.Time // when is the site last blocked } func newVisitCnt(direct, blocked vcntint) *VisitCnt { return &VisitCnt{direct, blocked, Date(time.Now()), true, zeroTime} } func newVisitCntWithTime(direct, blocked vcntint, t time.Time) *VisitCnt { return &VisitCnt{direct, blocked, Date(t), true, zeroTime} } func (vc *VisitCnt) userSpecified() bool { return vc.Blocked == userCnt || vc.Direct == userCnt } const siteStaleThreshold = 10 * 24 * time.Hour func (vc *VisitCnt) isStale() bool { return time.Now().Sub(time.Time(vc.Recent)) > siteStaleThreshold } // shouldNotSave returns true if the a VisitCnt is not visited for a long time // (several days) or is specified by user. func (vc *VisitCnt) shouldNotSave() bool { return vc.userSpecified() || vc.isStale() || (vc.Blocked == 0 && vc.Direct == 0) } const tmpBlockedTimeout = 2 * time.Minute func (vc *VisitCnt) AsTempBlocked() bool { return time.Now().Sub(vc.blockedOn) < tmpBlockedTimeout } func (vc *VisitCnt) AsDirect() bool { return (vc.Blocked == 0) || (vc.Direct-vc.Blocked >= directDelta) || vc.AlwaysDirect() } func (vc *VisitCnt) AsBlocked() bool { if vc.Blocked == userCnt || vc.AsTempBlocked() { return true } // add some randomness to fix mistake delta := vc.Blocked - vc.Direct return delta >= blockedDelta && rand.Intn(int(delta)) != 0 } func (vc *VisitCnt) AlwaysDirect() bool { return vc.Direct == userCnt } func (vc *VisitCnt) AlwaysBlocked() bool { return vc.Blocked == userCnt } func (vc *VisitCnt) OnceBlocked() bool { return vc.Blocked > 0 || vc.AlwaysBlocked() || vc.AsTempBlocked() } func (vc *VisitCnt) tempBlocked() { vc.blockedOn = time.Now() } // time.Time is composed of 3 fields, so need lock to protect update. As // update of last visit is not frequent (at most once for each domain), use a // global lock to avoid associating a lock to each VisitCnt. var visitLock sync.Mutex // visit updates visit cnt func (vc *VisitCnt) visit(inc *vcntint) { if *inc < maxCnt { *inc++ } // Because of concurrent update, possible for *inc to overflow and become // negative, but very unlikely. if *inc > maxCnt || *inc < 0 { *inc = maxCnt } if !vc.rUpdated { vc.rUpdated = true visitLock.Lock() vc.Recent = Date(time.Now()) visitLock.Unlock() } } func (vc *VisitCnt) DirectVisit() { if networkBad() || vc.userSpecified() { return } // one successful direct visit probably means the site is not actually // blocked vc.visit(&vc.Direct) vc.Blocked = 0 } func (vc *VisitCnt) BlockedVisit() { if networkBad() || vc.userSpecified() { return } // When a site changes from direct to blocked by GFW, COW should learn // this quickly and remove it from the PAC ASAP. So change direct to 0 // once there's a single blocked visit, this ensures the site is removed // upon the next PAC update. vc.visit(&vc.Blocked) vc.Direct = 0 } type SiteStat struct { Update Date `json:"update"` Vcnt map[string]*VisitCnt `json:"site_info"` // Vcnt uses host as key vcLock sync.RWMutex // Whether a domain has blocked host. Used to avoid considering a domain as // direct though it has blocked hosts. hasBlockedHost map[string]bool hbhLock sync.RWMutex } func newSiteStat() *SiteStat { return &SiteStat{ Vcnt: map[string]*VisitCnt{}, hasBlockedHost: map[string]bool{}, } } func (ss *SiteStat) get(s string) *VisitCnt { ss.vcLock.RLock() Vcnt, ok := ss.Vcnt[s] ss.vcLock.RUnlock() if ok { return Vcnt } return nil } func (ss *SiteStat) create(s string) (vcnt *VisitCnt) { vcnt = newVisitCnt(0, 0) ss.vcLock.Lock() ss.Vcnt[s] = vcnt ss.vcLock.Unlock() return } // Caller should guarantee that always direct url does not attempt // blocked visit. func (ss *SiteStat) TempBlocked(url *URL) { debug.Printf("%s temp blocked\n", url.Host) vcnt := ss.get(url.Host) if vcnt == nil { panic("TempBlocked should always get existing visitCnt") } vcnt.tempBlocked() // Mistakenly consider a partial blocked domain as direct will make that // domain into PAC and never have a chance to correct the error. // Once using blocked visit, a host is considered to maybe blocked even if // it's block visit count decrease to 0. As hasBlockedHost is not saved, // upon next start up of COW, the information will reflect the current // status of that host. ss.hbhLock.RLock() t := ss.hasBlockedHost[url.Domain] ss.hbhLock.RUnlock() if !t { ss.hbhLock.Lock() ss.hasBlockedHost[url.Domain] = true ss.hbhLock.Unlock() } } var alwaysDirectVisitCnt = newVisitCnt(userCnt, 0) func (ss *SiteStat) GetVisitCnt(url *URL) (vcnt *VisitCnt) { if parentProxy.empty() { // no way to retry, so always visit directly return alwaysDirectVisitCnt } if url.Domain == "" { // simple host or private ip return alwaysDirectVisitCnt } if vcnt = ss.get(url.Host); vcnt != nil { return } if len(url.Domain) != len(url.Host) { if dmcnt := ss.get(url.Domain); dmcnt != nil && dmcnt.userSpecified() { // if the domain is not specified by user, should create a new host // visitCnt return dmcnt } } return ss.create(url.Host) } func (ss *SiteStat) store(statPath string) (err error) { now := time.Now() var savedSS *SiteStat if ss.Update == Date(zeroTime) { ss.Update = Date(time.Now()) } if now.Sub(time.Time(ss.Update)) > siteStaleThreshold { // Not updated for a long time, don't drop any record savedSS = ss // Changing update time too fast will also drop useful record savedSS.Update = Date(time.Time(ss.Update).Add(siteStaleThreshold / 2)) if time.Time(savedSS.Update).After(now) { savedSS.Update = Date(now) } } else { savedSS = newSiteStat() savedSS.Update = Date(now) ss.vcLock.RLock() for site, vcnt := range ss.Vcnt { if vcnt.shouldNotSave() { continue } savedSS.Vcnt[site] = vcnt } ss.vcLock.RUnlock() } b, err := json.MarshalIndent(savedSS, "", "\t") if err != nil { errl.Println("Error marshalling site stat:", err) panic("internal error: error marshalling site") } // Store stat into temp file first and then rename. // Ensures atomic update to stat file to avoid file damage. // Create tmp file inside config firectory to avoid cross FS rename. f, err := ioutil.TempFile(config.dir, "stat") if err != nil { errl.Println("create tmp file to store stat", err) return } if _, err = f.Write(b); err != nil { errl.Println("Error writing stat file:", err) f.Close() return } f.Close() // Windows don't allow rename to existing file. os.Remove(statPath + ".bak") os.Rename(statPath, statPath+".bak") if err = os.Rename(f.Name(), statPath); err != nil { errl.Println("rename new stat file", err) return } return } func (ss *SiteStat) loadList(lst []string, direct, blocked vcntint) { for _, d := range lst { ss.Vcnt[d] = newVisitCntWithTime(direct, blocked, zeroTime) } } func (ss *SiteStat) loadBuiltinList() { ss.loadList(blockedDomainList, 0, userCnt) ss.loadList(directDomainList, userCnt, 0) } func (ss *SiteStat) loadUserList() { if directList, err := loadSiteList(config.DirectFile); err == nil { ss.loadList(directList, userCnt, 0) } if blockedList, err := loadSiteList(config.BlockedFile); err == nil { ss.loadList(blockedList, 0, userCnt) } } // Filter sites covered by user specified domains, also filter out stale // sites. func (ss *SiteStat) filterSites() { // It's not safe to remove element while iterating over a map. var removeSites []string // find what to remove first ss.vcLock.RLock() for site, vcnt := range ss.Vcnt { if vcnt.userSpecified() { continue } if vcnt.isStale() { removeSites = append(removeSites, site) continue } var dmcnt *VisitCnt domain := host2Domain(site) if domain != site { dmcnt = ss.get(domain) } if dmcnt != nil && dmcnt.userSpecified() { removeSites = append(removeSites, site) } } ss.vcLock.RUnlock() // do remove ss.vcLock.Lock() for _, site := range removeSites { delete(ss.Vcnt, site) } ss.vcLock.Unlock() } func (ss *SiteStat) load(file string) (err error) { defer func() { // load builtin list first, so user list can override builtin ss.loadBuiltinList() ss.loadUserList() ss.filterSites() for host, vcnt := range ss.Vcnt { if vcnt.OnceBlocked() { ss.hasBlockedHost[host2Domain(host)] = true } } }() if file == "" { return } if err = isFileExists(file); err != nil { if !os.IsNotExist(err) { errl.Println("Error loading stat:", err) } return } var f *os.File if f, err = os.Open(file); err != nil { errl.Printf("Error opening site stat %s: %v\n", file, err) return } defer f.Close() b, err := ioutil.ReadAll(f) if err != nil { errl.Println("Error reading site stat:", err) return } if err = json.Unmarshal(b, ss); err != nil { errl.Println("Error decoding site stat:", err) return } return } func (ss *SiteStat) GetDirectList() []string { lst := make([]string, 0) // anyway to do more fine grained locking? ss.vcLock.RLock() for site, vc := range ss.Vcnt { if ss.hasBlockedHost[host2Domain(site)] { continue } if vc.AsDirect() { lst = append(lst, site) } } ss.vcLock.RUnlock() return lst } var siteStat = newSiteStat() func initSiteStat() { err := siteStat.load(config.StatFile) if err != nil { // Simply try to load the stat.back, create a new object to avoid error // in default site list. siteStat = newSiteStat() err = siteStat.load(config.StatFile + ".bak") // After all its not critical , simply re-create a stat object if anything is not ok if err != nil { siteStat = newSiteStat() siteStat.load("") // load default site list } } // Dump site stat while running, so we don't always need to close cow to // get updated stat. go func() { for { time.Sleep(5 * time.Minute) storeSiteStat(siteStatCont) } }() } const ( siteStatExit = iota siteStatCont ) // Lock ensures only one goroutine calling store. // siteStatFini ensures no more calls after going to exit. var storeLock sync.Mutex var siteStatFini bool func storeSiteStat(cont byte) { storeLock.Lock() defer storeLock.Unlock() if siteStatFini { return } siteStat.store(config.StatFile) if cont == siteStatExit { siteStatFini = true } } func loadSiteList(fpath string) (lst []string, err error) { if fpath == "" { return } if err = isFileExists(fpath); err != nil { if !os.IsNotExist(err) { info.Printf("Error loading domaint list: %v\n", err) } return } f, err := os.Open(fpath) if err != nil { errl.Println("Error opening domain list:", err) return } defer f.Close() scanner := bufio.NewScanner(f) lst = make([]string, 0) for scanner.Scan() { site := strings.TrimSpace(scanner.Text()) if site == "" { continue } lst = append(lst, site) } if scanner.Err() != nil { errl.Printf("Error reading domain list %s: %v\n", fpath, scanner.Err()) } return lst, scanner.Err() } ================================================ FILE: sitestat_test.go ================================================ package main import ( "os" "testing" "time" ) var _ = os.Remove func TestNetworkBad(t *testing.T) { if networkBad() { t.Error("Network by default should be good") } } func TestDateMarshal(t *testing.T) { d := Date(time.Date(2013, 2, 4, 0, 0, 0, 0, time.UTC)) j, err := d.MarshalJSON() if err != nil { t.Error("Error marshalling json:", err) } if string(j) != "\"2013-02-04\"" { t.Error("Date marshal result wrong, got:", string(j)) } err = d.UnmarshalJSON([]byte("\"2013-01-01\"")) if err != nil { t.Error("Error unmarshaling Date:", err) } tm := time.Time(d) if tm.Year() != 2013 || tm.Month() != 1 || tm.Day() != 1 { t.Error("Unmarshaled date wrong, got:", tm) } } func TestSiteStatLoadStore(t *testing.T) { ss := newSiteStat() ss.load("testdata/nosuchfile") // load buildin and user specified list if len(ss.GetDirectList()) == 0 { t.Error("builtin site should appear in direct site list even with no stat file") } url1, _ := ParseRequestURI("www.foobar.com") url2, _ := ParseRequestURI("img.foobar.com") vcnt1 := ss.GetVisitCnt(url1) vcnt1.DirectVisit() vcnt1.DirectVisit() vcnt1.DirectVisit() vcnt2 := ss.GetVisitCnt(url2) vcnt2.DirectVisit() blockurl1, _ := ParseRequestURI("blocked.com") blockurl2, _ := ParseRequestURI("blockeurl2.com") si1 := ss.GetVisitCnt(blockurl1) si1.BlockedVisit() si2 := ss.GetVisitCnt(blockurl2) si2.BlockedVisit() // make google.com with a large direct count, but plus.google.com is in blocked list // so it shouldn't be considered as direct site gurl, _ := ParseRequestURI("google.com") gvcnt := ss.GetVisitCnt(gurl) gvcnt.Direct = 100 const stfile = "testdata/stat" if err := ss.store(stfile); err != nil { t.Fatal("store error:", err) } ld := newSiteStat() if err := ld.load(stfile); err != nil { t.Fatal("load stat error:", err) } vc := ld.get(url1.Host) if vc == nil { t.Fatalf("load error, %s not loaded\n", url1.Host) } if vc.Direct != 3 { t.Errorf("load error, %s should have visit cnt 3, got: %d\n", url1.Host, vc.Direct) } vc = ld.get(blockurl1.Host) if vc == nil { t.Errorf("load error, %s not loaded\n", blockurl1.Host) } // test bulitin site ap, _ := ParseRequestURI("apple.com") si := ld.GetVisitCnt(ap) if !si.AlwaysDirect() { t.Error("builtin site apple.com should always use direct access") } tw, _ := ParseRequestURI("twitter.com") si = ld.GetVisitCnt(tw) if !si.AsBlocked() || !si.AlwaysBlocked() { t.Error("builtin site twitter.com should use blocked access") } plus, _ := ParseRequestURI("plus.google.com") si = ld.GetVisitCnt(plus) if !si.AsBlocked() || !si.AlwaysBlocked() { t.Error("builtin site plus.google.com should use blocked access") } directList := ld.GetDirectList() if len(directList) == 0 { t.Error("builtin site should appear in direct site list") } if !ld.hasBlockedHost["google.com"] { t.Error("google.com should have blocked host") } for _, d := range directList { if d == "google.com" { t.Errorf("direct list contains 2nd level domain which has sub host that's blocked") } } os.Remove(stfile) } func TestSiteStatVisitCnt(t *testing.T) { ss := newSiteStat() g1, _ := ParseRequestURI("www.gtemp.com") g2, _ := ParseRequestURI("calendar.gtemp.com") g3, _ := ParseRequestURI("docs.gtemp.com") sg1 := ss.GetVisitCnt(g1) for i := 0; i < 30; i++ { sg1.DirectVisit() } sg2 := ss.GetVisitCnt(g2) sg2.DirectVisit() sg3 := ss.GetVisitCnt(g3) sg3.DirectVisit() if ss.hasBlockedHost[g1.Domain] { t.Errorf("direct domain %s should not have host at first\n", g1.Domain) } vc := ss.get(g1.Host) if vc == nil { t.Fatalf("no VisitCnt for %s\n", g1.Host) } if vc.Direct != 30 { t.Errorf("direct cnt for %s not correct, should be 30, got: %d\n", g1.Host, vc.Direct) } if vc.Blocked != 0 { t.Errorf("block cnt for %s not correct, should be 0 before blocked visit, got: %d\n", g1.Host, vc.Blocked) } if vc.rUpdated != true { t.Errorf("VisitCnt lvUpdated should be true after visit") } vc.BlockedVisit() if vc.Blocked != 1 { t.Errorf("blocked cnt for %s after 1 blocked visit should be 1, got: %d\n", g1.Host, vc.Blocked) } if vc.Direct != 0 { t.Errorf("direct cnt for %s after 1 blocked visit should be 0, got: %d\n", g1.Host, vc.Direct) } if vc.AsDirect() { t.Errorf("after blocked visit, a site should not be considered as direct\n") } // test blocked visit g4, _ := ParseRequestURI("plus.gtemp.com") si := ss.GetVisitCnt(g4) ss.TempBlocked(g4) // should be blocked for 2 minutes if !si.AsTempBlocked() { t.Error("should be blocked for 2 minutes after blocked visit") } si.BlockedVisit() // After temp blocked, update blocked visit count if si.Blocked != 1 { t.Errorf("blocked cnt for %s not correct, should be 1, got: %d\n", g4.Host, vc.Blocked) } vc = ss.get(g4.Host) if vc == nil { t.Fatal("no VisitCnt for ", g4.Host) } if vc.Direct != 0 { t.Errorf("direct cnt for %s not correct, should be 0, got: %d\n", g4.Host, vc.Direct) } if !ss.hasBlockedHost[g4.Domain] { t.Errorf("direct domain %s should have blocked host after blocked visit\n", g4.Domain) } } func TestSiteStatGetVisitCnt(t *testing.T) { ss := newSiteStat() g, _ := ParseRequestURI("gtemp.com") si := ss.GetVisitCnt(g) if !si.AsDirect() { t.Error("never visited site should be considered as direct") } if si.AsBlocked() || si.AsTempBlocked() { t.Error("never visited site should not be considered as blocked/temp blocked") } si.DirectVisit() gw, _ := ParseRequestURI("www.gtemp.com") sig := ss.GetVisitCnt(gw) // gtemp.com is not user specified, www.gtemp.com should get separate visitCnt if sig == si { t.Error("host should get separate visitCnt for not user specified domain") } b, _ := ParseRequestURI("www.btemp.com") ss.Vcnt[b.Host] = newVisitCnt(userCnt, 0) vc := ss.get(b.Host) if !vc.userSpecified() { t.Error("should be user specified") } if !vc.shouldNotSave() { t.Error("user specified should be dropped") } si = ss.GetVisitCnt(b) if !si.AlwaysDirect() { t.Errorf("%s should alwaysDirect\n", b.Host) } if si.AlwaysBlocked() { t.Errorf("%s should not alwaysBlocked\n", b.Host) } if si.OnceBlocked() { t.Errorf("%s should not onceBlocked\n", b.Host) } if !si.AsDirect() { t.Errorf("%s should use direct visit\n", b.Host) } tw, _ := ParseRequestURI("www.tblocked.com") ss.Vcnt[tw.Domain] = newVisitCnt(0, userCnt) si = ss.GetVisitCnt(tw) if !si.AsBlocked() { t.Errorf("%s should use blocked visit\n", tw.Host) } if si.AlwaysDirect() { t.Errorf("%s should not alwaysDirect\n", tw.Host) } if !si.AlwaysBlocked() { t.Errorf("%s should not alwaysBlocked\n", tw.Host) } if !si.OnceBlocked() { t.Errorf("%s should onceBlocked\n", tw.Host) } g1, _ := ParseRequestURI("www.shoulddirect.com") for i := 0; i < directDelta; i++ { si.DirectVisit() } si = ss.GetVisitCnt(g1) if !si.AsDirect() { t.Errorf("%s direct %d times, should use direct visit\n", g1.Host, directDelta+1) } if si.OnceBlocked() { t.Errorf("%s has not blocked visit, should not has once blocked\n", g1.Host) } si = ss.GetVisitCnt(g1) si.BlockedVisit() if !si.OnceBlocked() { t.Errorf("%s has one blocked visit, should has once blocked\n", g1.Host) } } ================================================ FILE: ssh.go ================================================ package main import ( "net" "os/exec" "strings" "time" ) func SshRunning(socksServer string) bool { c, err := net.Dial("tcp", socksServer) if err != nil { return false } c.Close() return true } func runOneSSH(server string) { // config parsing canonicalize sshServer config value arr := strings.SplitN(server, ":", 3) sshServer, localPort, sshPort := arr[0], arr[1], arr[2] alreadyRunPrinted := false socksServer := "127.0.0.1:" + localPort for { if SshRunning(socksServer) { if !alreadyRunPrinted { debug.Println("ssh socks server", socksServer, "maybe already running") alreadyRunPrinted = true } time.Sleep(30 * time.Second) continue } // -n redirects stdin from /dev/null // -N do not execute remote command debug.Println("connecting to ssh server", sshServer+":"+sshPort) cmd := exec.Command("ssh", "-n", "-N", "-D", localPort, "-p", sshPort, sshServer) if err := cmd.Run(); err != nil { debug.Println("ssh:", err) } debug.Println("ssh", sshServer+":"+sshPort, "exited, reconnect") time.Sleep(5 * time.Second) alreadyRunPrinted = false } } func runSSH() { for _, server := range config.SshServer { go runOneSSH(server) } } ================================================ FILE: stat.go ================================================ // Proxy statistics. package main import ( "sync" "sync/atomic" ) var status struct { cliCnt int32 // number of client connections srvConnCnt map[string]int // number of connections for each host:port srvConnCntMutex sync.Mutex } func initStat() { if !debug { return } status.srvConnCnt = make(map[string]int) } func incCliCnt() int32 { atomic.AddInt32(&status.cliCnt, 1) return status.cliCnt } func decCliCnt() int32 { atomic.AddInt32(&status.cliCnt, -1) return status.cliCnt } func addSrvConnCnt(srv string, delta int) int { status.srvConnCntMutex.Lock() status.srvConnCnt[srv] += delta cnt := status.srvConnCnt[srv] status.srvConnCntMutex.Unlock() return int(cnt) } func incSrvConnCnt(srv string) int { return addSrvConnCnt(srv, 1) } func decSrvConnCnt(srv string) int { return addSrvConnCnt(srv, -1) } ================================================ FILE: testdata/file ================================================ ================================================ FILE: timeoutset.go ================================================ package main import ( "sync" "time" ) type TimeoutSet struct { sync.RWMutex time map[string]time.Time timeout time.Duration } func NewTimeoutSet(timeout time.Duration) *TimeoutSet { ts := &TimeoutSet{time: make(map[string]time.Time), timeout: timeout, } return ts } func (ts *TimeoutSet) add(key string) { now := time.Now() ts.Lock() ts.time[key] = now ts.Unlock() } func (ts *TimeoutSet) has(key string) bool { ts.RLock() t, ok := ts.time[key] ts.RUnlock() if !ok { return false } if time.Now().Sub(t) > ts.timeout { ts.del(key) return false } return true } func (ts *TimeoutSet) del(key string) { ts.Lock() delete(ts.time, key) ts.Unlock() } ================================================ FILE: util.go ================================================ package main import ( "bytes" "crypto/md5" "errors" "fmt" "io" "net" "os" "path" "runtime" "strconv" "strings" "github.com/cyfdecyf/bufio" ) const isWindows = runtime.GOOS == "windows" type notification chan byte func newNotification() notification { // Notification channle has size 1, so sending a single one will not block return make(chan byte, 1) } func (n notification) notify() { n <- 1 } func (n notification) hasNotified() bool { select { case <-n: return true default: return false } } func ASCIIToUpperInplace(b []byte) { for i := 0; i < len(b); i++ { if 97 <= b[i] && b[i] <= 122 { b[i] -= 32 } } } func ASCIIToUpper(b []byte) []byte { buf := make([]byte, len(b)) for i := 0; i < len(b); i++ { if 97 <= b[i] && b[i] <= 122 { buf[i] = b[i] - 32 } else { buf[i] = b[i] } } return buf } func ASCIIToLowerInplace(b []byte) { for i := 0; i < len(b); i++ { if 65 <= b[i] && b[i] <= 90 { b[i] += 32 } } } func ASCIIToLower(b []byte) []byte { buf := make([]byte, len(b)) for i := 0; i < len(b); i++ { if 65 <= b[i] && b[i] <= 90 { buf[i] = b[i] + 32 } else { buf[i] = b[i] } } return buf } func IsDigit(b byte) bool { return '0' <= b && b <= '9' } var spaceTbl = [256]bool{ '\t': true, // ht '\n': true, // lf '\r': true, // cr ' ': true, // sp } func IsSpace(b byte) bool { return spaceTbl[b] } func TrimSpace(s []byte) []byte { st := 0 end := len(s) - 1 for ; st < len(s) && IsSpace(s[st]); st++ { } if st == len(s) { return s[:0] } for ; end >= 0 && IsSpace(s[end]); end-- { } return s[st : end+1] } func TrimTrailingSpace(s []byte) []byte { end := len(s) - 1 for ; end >= 0 && IsSpace(s[end]); end-- { } return s[:end+1] } // FieldsN is simliar with bytes.Fields, but only consider space and '\t' as // space, and will include all content in the final slice with ending white // space characters trimmed. bytes.Split can't split on both space and '\t', // and considers two separator as an empty item. bytes.FieldsFunc can't // specify how much fields we need, which is required for parsing response // status line. Returns nil if n < 0. func FieldsN(s []byte, n int) [][]byte { if n <= 0 { return nil } res := make([][]byte, n) na := 0 fieldStart := -1 var i int for ; i < len(s); i++ { issep := s[i] == ' ' || s[i] == '\t' if fieldStart < 0 && !issep { fieldStart = i } if fieldStart >= 0 && issep { if na == n-1 { break } res[na] = s[fieldStart:i] na++ fieldStart = -1 } } if fieldStart >= 0 { // must have na <= n-1 here res[na] = TrimSpace(s[fieldStart:]) if len(res[na]) != 0 { // do not consider ending space as a field na++ } } return res[:na] } var digitTbl = [256]int8{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, } // ParseIntFromBytes parse hexidecimal number from given bytes. // No prefix (e.g. 0xdeadbeef) should given. // base can only be 10 or 16. func ParseIntFromBytes(b []byte, base int) (n int64, err error) { // Currently, we have to convert []byte to string to use strconv // Refer to: http://code.google.com/p/go/issues/detail?id=2632 // That's why I created this function. if base != 10 && base != 16 { err = errors.New(fmt.Sprintf("invalid base: %d", base)) return } if len(b) == 0 { err = errors.New("parse int from empty bytes") return } neg := false if b[0] == '+' { b = b[1:] } else if b[0] == '-' { b = b[1:] neg = true } for _, d := range b { v := digitTbl[d] if v == -1 { n = 0 err = errors.New(fmt.Sprintf("invalid number: %s", b)) return } if int(v) >= base { n = 0 err = errors.New(fmt.Sprintf("invalid base %d number: %s", base, b)) return } n *= int64(base) n += int64(v) } if neg { n = -n } return } func isFileExists(path string) error { stat, err := os.Stat(path) if err != nil { return err } if !stat.Mode().IsRegular() { return fmt.Errorf("%s is not regular file", path) } return nil } func isDirExists(path string) error { stat, err := os.Stat(path) if err != nil { return err } if !stat.IsDir() { return fmt.Errorf("%s is not directory", path) } return nil } func getUserHomeDir() string { home := os.Getenv("HOME") if home == "" { fmt.Println("HOME environment variable is empty") } return home } func expandTilde(pth string) string { if len(pth) > 0 && pth[0] == '~' { home := getUserHomeDir() return path.Join(home, pth[1:]) } return pth } // copyN copys N bytes from src to dst, reading at most rdSize for each read. // rdSize should <= buffer size of the buffered reader. // Returns any encountered error. func copyN(dst io.Writer, src *bufio.Reader, n, rdSize int) (err error) { // Most of the copy is copied from io.Copy for n > 0 { var b []byte var er error if n > rdSize { b, er = src.ReadN(rdSize) } else { b, er = src.ReadN(n) } nr := len(b) n -= nr if nr > 0 { nw, ew := dst.Write(b) if ew != nil { err = ew break } if nr != nw { err = io.ErrShortWrite break } } if er == io.EOF { break } if er != nil { err = er break } } return err } func md5sum(ss ...string) string { h := md5.New() for _, s := range ss { io.WriteString(h, s) } return fmt.Sprintf("%x", h.Sum(nil)) } // hostIsIP determines whether a host address is an IP address and whether // it is private. Currenly only handles IPv4 addresses. func hostIsIP(host string) (isIP, isPrivate bool) { part := strings.Split(host, ".") if len(part) != 4 { return false, false } for _, i := range part { if len(i) == 0 || len(i) > 3 { return false, false } n, err := strconv.Atoi(i) if err != nil || n < 0 || n > 255 { return false, false } } if part[0] == "127" || part[0] == "10" || (part[0] == "192" && part[1] == "168") { return true, true } if part[0] == "172" { n, _ := strconv.Atoi(part[1]) if 16 <= n && n <= 31 { return true, true } } return true, false } // NetNbitIPv4Mask returns a IPMask with highest n bit set. func NewNbitIPv4Mask(n int) net.IPMask { if n > 32 { panic("NewNbitIPv4Mask: bit number > 32") } mask := []byte{0, 0, 0, 0} for id := 0; id < 4; id++ { if n >= 8 { mask[id] = 0xff } else { mask[id] = ^byte(1<<(uint8(8-n)) - 1) break } n -= 8 } return net.IPMask(mask) } var topLevelDomain = map[string]bool{ "ac": true, "co": true, "com": true, "edu": true, "gov": true, "net": true, "org": true, } func trimLastDot(s string) string { if len(s) > 0 && s[len(s)-1] == '.' { return s[:len(s)-1] } return s } // host2Domain returns the domain of a host. It will recognize domains like // google.com.hk. Returns empty string for simple host and internal IP. func host2Domain(host string) (domain string) { isIP, isPrivate := hostIsIP(host) if isPrivate { return "" } if isIP { return host } host = trimLastDot(host) lastDot := strings.LastIndex(host, ".") if lastDot == -1 { return "" } // Find the 2nd last dot dot2ndLast := strings.LastIndex(host[:lastDot], ".") if dot2ndLast == -1 { return host } part := host[dot2ndLast+1 : lastDot] // If the 2nd last part of a domain name equals to a top level // domain, search for the 3rd part in the host name. // So domains like bbc.co.uk will not be recorded as co.uk if topLevelDomain[part] { dot3rdLast := strings.LastIndex(host[:dot2ndLast], ".") if dot3rdLast == -1 { return host } return host[dot3rdLast+1:] } return host[dot2ndLast+1:] } // IgnoreUTF8BOM consumes UTF-8 encoded BOM character if present in the file. func IgnoreUTF8BOM(f *os.File) error { bom := make([]byte, 3) n, err := f.Read(bom) if err != nil { return err } if n != 3 { return nil } if bytes.Equal(bom, []byte{0xEF, 0xBB, 0xBF}) { debug.Println("UTF-8 BOM found") return nil } // No BOM found, seek back _, err = f.Seek(-3, 1) return err } // Return all host IP addresses. func hostAddr() (addr []string) { allAddr, err := net.InterfaceAddrs() if err != nil { Fatal("error getting host address", err) } for _, ad := range allAddr { ads := ad.String() id := strings.Index(ads, "/") if id == -1 { // On windows, no network mask. id = len(ads) } addr = append(addr, ads[:id]) } return addr } ================================================ FILE: util_test.go ================================================ package main import ( "bytes" "errors" "strings" "testing" "github.com/cyfdecyf/bufio" ) func TestASCIIToUpper(t *testing.T) { testData := []struct { raw []byte upper []byte }{ {[]byte("foobar"), []byte("FOOBAR")}, {[]byte("fOoBAr"), []byte("FOOBAR")}, {[]byte("..fOoBAr\n"), []byte("..FOOBAR\n")}, } for _, td := range testData { up := ASCIIToUpper(td.raw) if !bytes.Equal(up, td.upper) { t.Errorf("raw: %s, upper: %s\n", td.raw, up) } } } func TestASCIIToLower(t *testing.T) { testData := []struct { raw []byte lower []byte }{ {[]byte("FOOBAR"), []byte("foobar")}, {[]byte("fOoBAr"), []byte("foobar")}, {[]byte("..fOoBAr\n"), []byte("..foobar\n")}, } for _, td := range testData { low := ASCIIToLower(td.raw) if !bytes.Equal(low, td.lower) { t.Errorf("raw: %s, lower: %s\n", td.raw, low) } } } func TestIsDigit(t *testing.T) { for i := 0; i < 10; i++ { digit := '0' + byte(i) letter := 'a' + byte(i) if IsDigit(digit) != true { t.Errorf("%c should return true", digit) } if IsDigit(letter) == true { t.Errorf("%c should return false", letter) } } } func TestIsSpace(t *testing.T) { testData := []struct { c byte is bool }{ {'a', false}, {'B', false}, {'z', false}, {'(', false}, {'}', false}, {' ', true}, {'\r', true}, {'\t', true}, {'\n', true}, } for _, td := range testData { if IsSpace(td.c) != td.is { t.Errorf("%v isspace wrong", rune(td.c)) } } } func TestTrimSpace(t *testing.T) { testData := []struct { old string trimed string }{ {"hello", "hello"}, {" hello", "hello"}, {" hello\r\n ", "hello"}, {" hello \t ", "hello"}, {"", ""}, {"\r\n", ""}, } for _, td := range testData { trimed := string(TrimSpace([]byte(td.old))) if trimed != td.trimed { t.Errorf("%s trimmed to %s, wrong", td.old, trimed) } } } func TestTrimTrailingSpace(t *testing.T) { testData := []struct { old string trimed string }{ {"hello", "hello"}, {" hello", " hello"}, {" hello\r\n ", " hello"}, {" hello \t ", " hello"}, {"", ""}, {"\r\n", ""}, } for _, td := range testData { trimed := string(TrimTrailingSpace([]byte(td.old))) if trimed != td.trimed { t.Errorf("%s trimmed to %s, should be %s\n", td.old, trimed, td.trimed) } } } func TestFieldsN(t *testing.T) { testData := []struct { raw string n int arr []string }{ {"", 2, nil}, // this should not crash {"hello world", -1, nil}, {"hello \t world welcome", 1, []string{"hello \t world welcome"}}, {" hello \t world welcome ", 1, []string{"hello \t world welcome"}}, {"hello world", 2, []string{"hello", "world"}}, {" hello\tworld ", 2, []string{"hello", "world"}}, // note \r\n in the middle of a string will be considered as a field {" hello world \r\n", 4, []string{"hello", "world"}}, {" hello \t world welcome\r\n", 2, []string{"hello", "world welcome"}}, {" hello \t world welcome \t ", 2, []string{"hello", "world welcome"}}, } for _, td := range testData { arr := FieldsN([]byte(td.raw), td.n) if len(arr) != len(td.arr) { t.Fatalf("%q want %d fields, got %d\n", td.raw, len(td.arr), len(arr)) } for i := 0; i < len(arr); i++ { if string(arr[i]) != td.arr[i] { t.Errorf("%q %d item, want %q, got %q\n", td.raw, i, td.arr[i], arr[i]) } } } } func TestParseIntFromBytes(t *testing.T) { errDummy := errors.New("dummy error") testData := []struct { raw []byte base int err error val int64 }{ {[]byte("123"), 10, nil, 123}, {[]byte("+123"), 10, nil, 123}, {[]byte("-123"), 10, nil, -123}, {[]byte("0"), 10, nil, 0}, {[]byte("a"), 10, errDummy, 0}, {[]byte("aBc"), 16, nil, 0xabc}, {[]byte("+aBc"), 16, nil, 0xabc}, {[]byte("-aBc"), 16, nil, -0xabc}, {[]byte("213e"), 16, nil, 0x213e}, {[]byte("12deadbeef"), 16, nil, 0x12deadbeef}, {[]byte("213n"), 16, errDummy, 0}, } for _, td := range testData { val, err := ParseIntFromBytes(td.raw, td.base) if err != nil && td.err == nil { t.Errorf("%s base %d should NOT return error: %v\n", td.raw, td.base, err) } if err == nil && td.err != nil { t.Errorf("%s base %d should return error\n", td.raw, td.base) } if val != td.val { t.Errorf("%s base %d got wrong value: %d\n", td.raw, td.base, val) } } } func TestCopyN(t *testing.T) { testStr := "go is really a nice language" for _, step := range []int{4, 9, 17, 32} { src := bufio.NewReader(strings.NewReader(testStr)) dst := new(bytes.Buffer) err := copyN(dst, src, len(testStr), step) if err != nil { t.Error("unexpected err:", err) break } if dst.String() != testStr { t.Errorf("step %d want %q, got: %q\n", step, testStr, dst.Bytes()) } } } func TestIsFileExists(t *testing.T) { err := isFileExists("testdata") if err == nil { t.Error("should return error is path is directory") } err = isFileExists("testdata/none") if err == nil { t.Error("Not existing file should return error") } err = isFileExists("testdata/file") if err != nil { t.Error("Why error for existing file?") } } func TestNewNbitIPv4Mask(t *testing.T) { mask := []byte(NewNbitIPv4Mask(32)) for i := 0; i < 4; i++ { if mask[i] != 0xff { t.Error("NewNbitIPv4Mask with 32 error") } } mask = []byte(NewNbitIPv4Mask(5)) if mask[0] != 0xf8 || mask[1] != 0 || mask[2] != 0 { t.Error("NewNbitIPv4Mask with 5 error:", mask) } mask = []byte(NewNbitIPv4Mask(9)) if mask[0] != 0xff || mask[1] != 0x80 || mask[2] != 0 { t.Error("NewNbitIPv4Mask with 9 error:", mask) } mask = []byte(NewNbitIPv4Mask(23)) if mask[0] != 0xff || mask[1] != 0xff || mask[2] != 0xfe || mask[3] != 0 { t.Error("NewNbitIPv4Mask with 23 error:", mask) } mask = []byte(NewNbitIPv4Mask(28)) if mask[0] != 0xff || mask[1] != 0xff || mask[2] != 0xff || mask[3] != 0xf0 { t.Error("NewNbitIPv4Mask with 28 error:", mask) } } func TestHost2Domain(t *testing.T) { var testData = []struct { host string domain string }{ {"www.google.com", "google.com"}, {"google.com", "google.com"}, {"com.cn", "com.cn"}, {"sina.com.cn", "sina.com.cn"}, {"www.bbc.co.uk", "bbc.co.uk"}, {"apple.com.cn", "apple.com.cn"}, {"simplehost", ""}, {"192.168.1.1", ""}, {"10.2.1.1", ""}, {"123.45.67.89", "123.45.67.89"}, {"172.65.43.21", "172.65.43.21"}, } for _, td := range testData { dm := host2Domain(td.host) if dm != td.domain { t.Errorf("%s got domain %v should be %v", td.host, dm, td.domain) } } } func TestHostIsIP(t *testing.T) { var testData = []struct { host string isIP bool isPri bool }{ {"127.0.0.1", true, true}, {"127.2.1.1", true, true}, {"192.168.1.1", true, true}, {"10.2.3.4", true, true}, {"172.16.5.3", true, true}, {"172.20.5.3", true, true}, {"172.31.5.3", true, true}, {"172.15.1.1", true, false}, {"123.45.67.89", true, false}, {"foo.com", false, false}, {"www.foo.com", false, false}, {"www.bar.foo.com", false, false}, } for _, td := range testData { isIP, isPri := hostIsIP(td.host) if isIP != td.isIP { if td.isIP { t.Error(td.host, "is IP address") } else { t.Error(td.host, "is NOT IP address") } } if isPri != td.isPri { if td.isPri { t.Error(td.host, "is private IP address") } else { t.Error(td.host, "is NOT private IP address") } } } }