[
  {
    "path": ".gitignore",
    "content": "# Created by .ignore support plugin (hsz.mobi)\n### Go template\n# Compiled Object files, Static and Dynamic libs (Shared Objects)\n*.o\n*.a\n*.so\n\n# Folders\n_obj\n_test\n\n# Architecture specific extensions/prefixes\n*.[568vq]\n[568vq].out\n\n*.cgo1.go\n*.cgo2.c\n_cgo_defun.c\n_cgo_gotypes.go\n_cgo_export.*\n\n_testmain.go\n\n*.exe\n*.test\n*.prof\n\nverify.gif\nexamples/config.json"
  },
  {
    "path": "LICENSE",
    "content": "The MIT License (MIT)\n\nCopyright (c) 2016 Yangliang Li\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "zhihu-go：知乎非官方 API 库 with Go\n=================================\n\n[![GoDoc](https://godoc.org/github.com/DeanThompson/zhihu-go?status.svg)](https://godoc.org/github.com/DeanThompson/zhihu-go)\n\n这是一个非官方的 [知乎](https://www.zhihu.com/) API 库，用 Go 实现。\n\n本项目基本上是把 [zhihu-python](https://github.com/egrcc/zhihu-python) 和 [zhihu-py3](https://github.com/7sDream/zhihu-py3) 从 Python 移植到了 Go. 相比之下，比 zhihu-python 的 API 更丰富，比 zhihu-py3 少了活动相关的 API.\n\n**注意：知乎的 API、前端等都可能随时会更新，所以本项目的接口可能会有过时的情况。如果遇到此类问题，欢迎提交 issue 或 pull requests.**\n\n## Table of Contents\n\n* [Table of Contents](#table-of-contents)\n* [Install](#install)\n* [Documentation](#documentation)\n* [Usage](#usage)\n  * [Login：登录](#login)\n  * [User：获取用户信息](#user)\n  * [Question：获取问题信息](#question)\n  * [Answer：获取答案信息](#answer)\n  * [Collection：获取收藏夹信息](#collection)\n  * [Topic：获取话题信息](#topic)\n* [Known Issues](#known-issues)\n* [TODO](#todo)\n* [LICENSE](#license)\n\n## Install\n\n直接使用 `go get`:\n\n```bash\ngo get github.com/DeanThompson/zhihu-go\n```\n\n依赖以下第三方库：\n\n* [goquery](https://github.com/PuerkitoBio/goquery)： 用于解析 HTML，语法操作类似 jQuery\n* [color](https://github.com/fatih/color)：用于输出带颜色的日志\n* [persistent-cookiejar](https://github.com/juju/persistent-cookiejar)：用于维护一个持久化的 cookiejar，实现保持登录\n\n## Documentation\n\n请点击链接前往 GoDoc 查看：[zhihu-go](https://godoc.org/github.com/DeanThompson/zhihu-go)\n\n## Usage\n\n目前已经实现了用户（User），问题（Question），回答（Answer），收藏夹（Collection），话题（Topic）相关的 API，都是信息获取类的，暂无操作类的。\n\nzhihu-go 包名为 `zhihu`，使用前需要先 import:\n\n```go\nimport \"github.com/DeanThompson/zhihu-go\"\n```\n\n### Login\n\n调用 API 之前需要先登录。在 zhihu-go 内部，使用一个全局的 session 来访问所有页面，并自动处理 cookies.\n\n创建一个 JSON 格式的配置文件，提供一个账号和密码，格式如 [config-example.json](examples/config-example.json).\n\n登录（初始化 session）：\n\n```go\nzhihu.Init(\"/path/to/config.json\")\n```\n\n第一次登录会调用图像界面打开验证码文件，需要手动输入验证码到控制台。如果登录成功，后续的请求会沿用此次登录的 cookie, 不需要重复登录。\n\n### User\n\n`zhihu.User` 表示一个知乎用户，可以用于获取一个用户的各种数据。\n\n创建一个 `User` 对象需要传入用户主页的 URL 及其知乎 ID（用户名），如：\n\n```go\nlink := \"https://www.zhihu.com/people/jixin\"\nuserID := \"黄继新\"\nuser := zhihu.NewUser(link, userID)\n```\n\n获取用户的数据（代码见：[example.go](examples/example.go#L159)）：\n\n```go\nfunc showUser(user *zhihu.User) {\n\tlogger.Info(\"User fields:\")\n\tlogger.Info(\"\tis anonymous: %v\", user.IsAnonymous())  // 是否匿名用户：false\n\tlogger.Info(\"\tuserId: %s\", user.GetUserID())          // 知乎ID：黄继新\n\tlogger.Info(\"\tdataId: %s\", user.GetDataID())          // hash ID：b6f80220378c8b0b78175dd6a0b9c680\n\tlogger.Info(\"\tbio: %s\", user.GetBio())                // BIO：和知乎在一起\n\tlogger.Info(\"\tlocation: %s\", user.GetLocation())      // 位置：北京\n\tlogger.Info(\"\tbusiness: %s\", user.GetBusiness())      // 行业：互联网\n\tlogger.Info(\"\tgender: %s\", user.GetGender())          // 性别：male\n\tlogger.Info(\"\teducation: %s\", user.GetEducation())    // 学校：北京第二外国语学院\n\tlogger.Info(\"\tfollowers num: %d\", user.GetFollowersNum()) // 粉丝数：756632\n\tlogger.Info(\"\tfollowees num: %d\", user.GetFolloweesNum()) // 关注的人数： 9249\n\tlogger.Info(\"\tfollowed columns num: %d\", user.GetFollowedColumnsNum()) // 关注的专栏数：631\n\tlogger.Info(\"\tfollowed topics num: %d\", user.GetFollowedTopicsNum())   // 关注的话题数：131\n\tlogger.Info(\"\tagree num: %d\", user.GetAgreeNum())     // 获得的赞同数：68557\n\tlogger.Info(\"\tthanks num: %d\", user.GetThanksNum())   // 获得的感谢数：17651\n\tlogger.Info(\"\tasks num: %d\", user.GetAsksNum())       // 提问数：1336\n\tlogger.Info(\"\tanswers num: %d\", user.GetAnswersNum()) // 回答数：785\n\tlogger.Info(\"\tposts num: %d\", user.GetPostsNum())     // 专栏文章数：92\n\tlogger.Info(\"\tcollections num: %d\", user.GetCollectionsNum()) // 收藏夹数量：44\n\tlogger.Info(\"\tlogs num: %d\", user.GetLogsNum())   // 公共编辑数：51596\n\t\n\t// <Topic: 知乎指南 - https://www.zhihu.com/topic/19550235>\n\t// <Topic: 苹果公司 (Apple Inc.) - https://www.zhihu.com/topic/19551762>\n\t// <Topic: 创新工场 - https://www.zhihu.com/topic/19624098>\n\t// <Topic: iPhone - https://www.zhihu.com/topic/19550292>\n\t// <Topic: 风险投资（VC） - https://www.zhihu.com/topic/19550422>\n\tfor i, topic := range user.GetFollowedTopicsN(5) {\n\t\tlogger.Info(\"\ttop followed topic-%d: %s\", i+1, topic.String())\n\t}\n\n\t// <User: Zz XI - https://www.zhihu.com/people/zz-xi-18>\n\t// <User: xyn - https://www.zhihu.com/people/xyn-31>\n\t// <User: 江湖人称丸子头 - https://www.zhihu.com/people/jiang-hu-ren-cheng-wan-zi-tou>\n\t// <User: 小萍果Y - https://www.zhihu.com/people/xiao-ping-guo-y>\n\t// <User: 最爱麦丽素 - https://www.zhihu.com/people/Mylikes-82>\n\tfor i, follower := range user.GetFollowersN(5) {\n\t\tlogger.Info(\"\ttop follower-%d: %s\", i+1, follower.String())\n\t}\n\n\t// <User: 最爱麦丽素 - https://www.zhihu.com/people/Mylikes-82>\n\t// <User: meidong - https://www.zhihu.com/people/zhalimuto>\n\t// <User: 青锐吴斌 - https://www.zhihu.com/people/wu-bin-817>\n\t// <User: Klaith - https://www.zhihu.com/people/Klaith>\n\t// <User: 张野 - https://www.zhihu.com/people/zhang-ye-91-9>\n\tfor i, followee := range user.GetFolloweesN(5) {\n\t\tlogger.Info(\"\ttop followee-%d: %s\", i+1, followee.String())\n\t}\n\n\t// <Question: 偏好投票制（Preferential Voting）的优点和缺点是什么？最适用于哪类场合？ - https://www.zhihu.com/question/40939579>\n\t// <Question: 关于飞机上对使用手机的限制，为什么国内航班比国外航班严？ - https://www.zhihu.com/question/34302422>\n\t// <Question: 银联人民币卡可以在美国支持银联的 ATM 机上直接取美元吗？ - https://www.zhihu.com/question/33874729>\n\t// <Question: 小猫掉进了 5 米深的天井，如何能尽快救出？救助时应注意什么？ - https://www.zhihu.com/question/33307041>\n\t// <Question: 一件商品打一折（90% off）销售，这属于「超高折扣」还是「超低折扣」？ - https://www.zhihu.com/question/31332557>\n\tfor i, ask := range user.GetAsksN(5) {\n\t\tlogger.Info(\"\ttop ask-%d: %s\", i+1, ask.String())\n\t}\n\n\t// <Answer: <User: 黄继新 - https://www.zhihu.com/people/jixin> - https://www.zhihu.com/question/40394171/answer/86692178>\n\t// <Answer: <User: 黄继新 - https://www.zhihu.com/people/jixin> - https://www.zhihu.com/question/19952708/answer/84561308>\n\t// <Answer: <User: 黄继新 - https://www.zhihu.com/people/jixin> - https://www.zhihu.com/question/35987345/answer/72981016>\n\t// <Answer: <User: 黄继新 - https://www.zhihu.com/people/jixin> - https://www.zhihu.com/question/24980451/answer/29789141>\n\t// <Answer: <User: 黄继新 - https://www.zhihu.com/people/jixin> - https://www.zhihu.com/question/24816698/answer/29229733>\n\tfor i, answer := range user.GetAnswersN(5) {\n\t\tlogger.Info(\"\ttop answer-%d: %s\", i+1, answer.String())\n\t}\n\n\t// <Collection: 单子 - https://www.zhihu.com/collection/36510307>\n\t// <Collection: 稍后回答 - https://www.zhihu.com/collection/19665350>\n\t// <Collection: 广告！ - https://www.zhihu.com/collection/19688005>\n\t// <Collection: 关于知乎的思考 - https://www.zhihu.com/collection/19573315>\n\t// <Collection: MD，说得太好了！ - https://www.zhihu.com/collection/19886553>\n\tfor i, collection := range user.GetCollectionsN(5) {\n\t\tlogger.Info(\"\ttop collection-%d: %s\", i+1, collection.String())\n\t}\n\n\tfor i, like := range user.GetLikes() {\n\t\tlogger.Info(\"\tlike-%d: %s\", i+1, like.String())\n\t}\n}\n```\n\n### Question\n\n`zhihu.Question` 表示一个知乎问题，用于获取问题相关的数据。初始化需要提供 url 和标题（可为空）:\n\n```go\nlink := \"https://www.zhihu.com/question/28966220\"\ntitle := \"Python 编程，应该养成哪些好的习惯？\"\nquestion := zhihu.NewQuestion(link, title)\n```\n\n获取问题数据：（代码见：[example.go](examples/example.go#L51)）\n\n```go\nfunc showQuestion(question *zhihu.Question) {\n\tlogger.Info(\"Question fields:\")\n\t\n\t// 链接：https://www.zhihu.com/question/28966220\n\tlogger.Info(\"\turl: %s\", question.Link)\n\t\n\t// 标题：Python 编程，应该养成哪些好的习惯？\n\tlogger.Info(\"\ttitle: %s\", question.GetTitle())\n\t\n\t// 描述：我以为编程习惯很重要的，一开始就养成这些习惯，不仅可以提高编程速度，还可以减少 bug 出现的概率。希望各位分享好的编程习惯。\n\tlogger.Info(\"\tdetail: %s\", question.GetDetail())\n\t\n\t\n\tlogger.Info(\"\tanswers num: %d\", question.GetAnswersNum()) // 回答数：15\n\tlogger.Info(\"\tfollowers num: %d\", question.GetFollowersNum()) // 关注者数量：1473\n\n\t// <Topic: 程序员 - https://www.zhihu.com/topic/19552330>\n\t// <Topic: Python - https://www.zhihu.com/topic/19552832>\n\t// <Topic: 编程 - https://www.zhihu.com/topic/19554298>\n\t// <Topic: Python 入门 - https://www.zhihu.com/topic/19661050>\n\tfor i, topic := range question.GetTopics() {\n\t\tlogger.Info(\"\ttopic-%d: %s\", i+1, topic.String())\n\t}\n\n\t// <User: 铁头爸爸 - https://www.zhihu.com/people/li-liang-68-9>\n\t// <User: 阳阳 - https://www.zhihu.com/people/yang-yang-3-29-52>\n\t// <User: 田小芳 - https://www.zhihu.com/people/tian-xiao-fang-55>\n\t// <User: 濕濕 - https://www.zhihu.com/people/shi-shi-29-7-18>\n\t// <User: 陈翔宇 - https://www.zhihu.com/people/chen-xiang-yu-91-74>\n\tfor i, follower := range question.GetFollowersN(5) {\n\t\tlogger.Info(\"\ttop follower-%d: %s\", i+1, follower.String())\n\t}\n\n\tfor i, follower := range question.GetFollowers() {  // 关注者列表\n\t\tlogger.Info(\"\tfollower-%d: %s\", i+1, follower.String())\n\t\tif i >= 10 {\n\t\t\tlogger.Info(\"\t%d followers not shown.\", question.GetFollowersNum()-i-1)\n\t\t\tbreak\n\t\t}\n\t}\n\n\tallAnswers := question.GetAllAnswers()  // 所有回答\n\tfor i, answer := range allAnswers {\n\t\tlogger.Info(\"\tanswer-%d: %s\", i+1, answer.String())\n\t\tfilename := fmt.Sprintf(\"/tmp/%s-%s的回答.html\", question.GetTitle(), answer.GetAuthor().GetUserID())\n\t\tdumpAnswerHTML(filename, answer)\n\t\tif i >= 10 {\n\t\t\tlogger.Info(\"\t%d answers not shown.\", len(allAnswers)-i-1)\n\t\t\tbreak\n\t\t}\n\t}\n\n\ttopXAnswers := question.GetTopXAnswers(25)  // 前 25 个回答\n\tfor i, answer := range topXAnswers {\n\t\tlogger.Info(\"\ttop-%d answer: %s\", i+1, answer.String())\n\t}\n\n\t// 排名第一的回答\n\t// <Answer: <User: 陈村 - https://www.zhihu.com/people/xjiangxjxjxjx> - https://www.zhihu.com/question/28966220/answer/43346747>\n\tlogger.Info(\"\ttop-1 answer: %s\", question.GetTopAnswer().String())\n\t\n\tlogger.Info(\"\tvisit times: %d\", question.GetVisitTimes()) // 查看次数：32942\n}\n```\n\n### Answer\n\n`zhihu.Answer` 表示一个知乎答案，初始化时需要指定页面链接，也支持指定对应的问题（`*Question`，可以为 `nil`）和作者（`*User`，可以为 `nil`）：\n\n```go\n// 龙有九个儿子，是跟谁生的？为什么「龙生九子，各不成龙」？豆子 的答案\nanswer := zhihu.NewAnswer(\"https://www.zhihu.com/question/23759686/answer/41997389\", nil, nil)\n```\n\n获取回答数据：（代码见：[example.go](examples/example.go#L95)）\n\n```go\nfunc showAnswer(answer *zhihu.Answer) {\n\tlogger.Info(\"Answer fields:\")\n\t\n\t// 链接：https://www.zhihu.com/question/23759686/answer/41997389\n\tlogger.Info(\"\turl: %s\", answer.Link)\n\n\t// 所属问题\n\t// 链接：https://www.zhihu.com/question/23759686\n\t// 标题：龙有九个儿子，是跟谁生的？为什么「龙生九子，各不成龙」？\n\tquestion := answer.GetQuestion()\n\tlogger.Info(\"\tquestion url: %s\", question.Link)\n\tlogger.Info(\"\tquestion title: %s\", question.GetTitle())\n\n\t// 作者：<User: 豆子 - https://www.zhihu.com/people/douzishushu>\n\tlogger.Info(\"\tauthor: %s\", answer.GetAuthor().String())\n\t\n\tlogger.Info(\"\tupvote num: %d\", answer.GetUpvote())    // 赞同数：26486\n\tlogger.Info(\"\tcomments num: %d\", answer.GetCommentsNum()) // 评论数：20\n\tlogger.Info(\"\tcollected num: %d\", answer.GetCollectedNum())\t// 被收藏次数：22929\n\tlogger.Info(\"\tdata ID: %d\", answer.GetID())   // 数字 ID：12191779\n\n\t// 点赞的用户\n\tvoters := answer.GetVoters()\n\tfor i, voter := range voters {\n\t\tlogger.Info(\"\tvoter-%d: %s\", i+1, voter.String())\n\t\tif i >= 10 {\n\t\t\tremain := len(voters) - i - 1\n\t\t\tlogger.Info(\"\t%d votes not shown.\", remain)\n\t\t\tbreak\n\t\t}\n\t}\n}\n```\n\n### Collection\n\n`zhihu.Collection` 表示一个收藏夹，初始化时必须指定页面 url，支持指定名称（`string` 可以为 `\"\"`）和创建者（`creator *User`，可以为 `nil`）：\n\n```go\n// 黄继新 A4U\ncollection := zhihu.NewCollection(\"https://www.zhihu.com/collection/19677733\", \"\", nil)\n```\n\n获取收藏夹数据：（代码见：[example.go](examples/example.go#L124)）\n\n```go\nfunc showCollection(collection *zhihu.Collection) {\n\tlogger.Info(\"Collection fields:\")\n\t\n\t// 链接：https://www.zhihu.com/collection/19677733\n\tlogger.Info(\"\turl: %s\", collection.Link)\n\t\n\t// 名称：A4U\n\tlogger.Info(\"\tname: %s\", collection.GetName())\n\t\n\t// 作者：<User: 黄继新 - https://www.zhihu.com/people/jixin>\n\tlogger.Info(\"\tcreator: %s\", collection.GetCreator().String())\n\tlogger.Info(\"\tfollowers num: %d\", collection.GetFollowersNum())   // 关注者数量：29\n\n\t// 获取 5 个关注者\n\tfor i, follower := range collection.GetFollowersN(5) {\n\t\tlogger.Info(\"\ttop follower-%d: %s\", i+1, follower.String())\n\t}\n\t\n\t// 获取 5 个问题\n\tfor i, question := range collection.GetQuestionsN(5) {\n\t\tlogger.Info(\"\ttop question-%d: %s\", i+1, question.String())\n\t}\n\n\t// 获取 5 个回答\n\tfor i, answer := range collection.GetAnswersN(5) {\n\t\tlogger.Info(\"\ttop answer-%d: %s\", i+1, answer.String())\n\t}\n}\n```\n\n### Topic\n\n`zhihu.Collection` 表示一个话题，初始化时必须指定页面 url，支持指定名称（`string` 可以为 `\"\"`）：\n\n```go\n// Python\ntopic := zhihu.NewTopic(\"https://www.zhihu.com/topic/19552832\", \"\")\n```\n\n获取收藏夹数据：（代码见：[example.go](examples/example.go#L237)）\n\n```go\nfunc showTopic(topic *zhihu.Topic) {\n\tlogger.Info(\"Topic fields:\")\n\t\n\t// 链接：https://www.zhihu.com/topic/19552832\n\tlogger.Info(\"\turl: %s\", topic.Link)\n\t\n\t// 名称：Python\n\tlogger.Info(\"\tname: %s\", topic.GetName())\n\t\n\t// 描述：Python 是一种面向对象的解释型计算机程序设计语言，在设计中注重代码的可读性，同时也是一种功能强大的通用型语言。\n\tlogger.Info(\"\tdescription: %s\", topic.GetDescription())\n\t\n\t// 关注者数量：82805\n\tlogger.Info(\"\tfollowers num: %d\", topic.GetFollowersNum())\n\n\t// 最佳答主，一般为 5 个\n\t// <User: RednaxelaFX - https://www.zhihu.com/people/rednaxelafx>\n\t// <User: 松鼠奥利奥 - https://www.zhihu.com/people/tonyseek>\n\t// <User: 涛吴 - https://www.zhihu.com/people/Metaphox>\n\t// <User: 冯昱尧 - https://www.zhihu.com/people/feng-yu-yao>\n\t// <User: Coldwings - https://www.zhihu.com/people/coldwings>\n\tfor i, author := range topic.GetTopAuthors() {\n\t\tlogger.Info(\"\ttop-%d author: %s\", i+1, author.String())\n\t}\n}\n```\n\n## Known Issues\n\n无，欢迎 [提交 issues](https://github.com/DeanThompson/zhihu-go/issues)\n\n## TODO\n\n按优先级降序排列：\n\n* [X] 获取回答的收藏数\n* [X] 获取收藏夹的答案数量\n* [X] 获取用户的头像\n* [X] 获取用户的微博地址\n* [ ] 把答案导出到 markdown 文件\n* [ ] 更多的登录方式，不需要依赖图形界面打开验证码文件\n* [ ] 增加评论相关的 API\n* [ ] 增加活动相关的 API\n* [ ] 增加专栏相关的 API\n* [ ] test（暂时没想好怎么做）\n\n很可能不会做：\n\n* [ ] 增加用户的操作，如点赞、关注等\n\n欢迎 [提交 pull requests](https://github.com/DeanThompson/zhihu-go/pulls)\n\n## LICENSE\n\n[The MIT license](LICENSE).\n"
  },
  {
    "path": "answer.go",
    "content": "package zhihu\n\nimport (\n\t\"fmt\"\n\t\"net/url\"\n\t\"strconv\"\n\t\"strings\"\n\n\t\"github.com/PuerkitoBio/goquery\"\n)\n\n// Answer 是一个知乎的答案\ntype Answer struct {\n\t*Page\n\n\t// question 是该答案对应的问题\n\tquestion *Question\n\n\t// author 是该答案的作者\n\tauthor *User\n}\n\n// NewAnswer 用于创建一个 Answer 对象，其中 link 是必传的，question, author 可以为 nil\nfunc NewAnswer(link string, question *Question, author *User) *Answer {\n\treturn &Answer{\n\t\tPage:     newZhihuPage(link),\n\t\tquestion: question,\n\t\tauthor:   author,\n\t}\n}\n\n// GetID 返回该答案的数字 ID\nfunc (a *Answer) GetID() int {\n\tif got, ok := a.getIntField(\"data-aid\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := a.Doc()\n\ttext, _ := doc.Find(\"div.zm-item-answer.zm-item-expanded\").Attr(\"data-aid\")\n\taid, _ := strconv.Atoi(text)\n\ta.setField(\"data-aid\", aid)\n\treturn aid\n}\n\n// GetQuestion 返回该回答所属的问题，如果 NewAnswer 时 question 不为 nil，则直接返回该值；\n// 否则会抓取页面并分析得到问题的链接和标题，再新建一个 Question 对象\nfunc (a *Answer) GetQuestion() *Question {\n\tif a.question != nil {\n\t\treturn a.question\n\t}\n\n\tdoc := a.Doc()\n\thref, _ := doc.Find(\"h2.zm-item-title>a\").Attr(\"href\")\n\tlink := makeZhihuLink(href)\n\ttitle := strip(doc.Find(\"h2.zm-item-title\").First().Text())\n\treturn NewQuestion(link, title)\n}\n\n// Author 返回该答案的作者\nfunc (a *Answer) GetAuthor() *User {\n\tif a.author != nil {\n\t\treturn a.author\n\t}\n\n\tdoc := a.Doc()\n\tsel := doc.Find(\"div.zm-item-answer-author-info\").First()\n\treturn newUserFromAnswerAuthorTag(sel)\n}\n\n// GetUpvote 返回赞同数\nfunc (a *Answer) GetUpvote() int {\n\tif got, ok := a.getIntField(\"upvote\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := a.Doc()\n\ttext := strip(doc.Find(\"span.count\").First().Text())\n\tupvote := upvoteTextToNum(text)\n\ta.setField(\"upvote\", upvote)\n\treturn upvote\n}\n\n// ToMarkdown 把回答导出到 markdown 文件\nfunc (a *Answer) ToMarkdown(filename string) error {\n\tif !strings.HasSuffix(filename, \".md\") && !strings.HasSuffix(filename, \".markdown\") {\n\t\tfilename += \".md\"\n\t}\n\n\t// TODO convert to markdown\n\tmd := \"\"\n\n\treturn saveString(filename, md)\n}\n\n// ToHtml 把网页源码导出到 html 文件\nfunc (a *Answer) ToHtml(filename string) error {\n\tif !strings.HasSuffix(filename, \".html\") {\n\t\tfilename += \".html\"\n\t}\n\n\thtml, err := a.Doc().Html()\n\tif err != nil {\n\t\treturn err\n\t}\n\treturn saveString(filename, html)\n}\n\n// GetContent 返回回答的内容，HTML 格式\nfunc (a *Answer) GetContent() string {\n\tif got, ok := a.getStringField(\"content\"); ok {\n\t\treturn got\n\t}\n\n\tsel := a.Doc().Find(\"div#zh-question-answer-wrap\").Find(\"div.zm-editable-content\")\n\tcontent, err := answerSelectionToHtml(sel)\n\tif err != nil {\n\t\tlogger.Error(\"导出 HTML 失败：%s\", err.Error())\n\t\treturn \"\"\n\t}\n\ta.setField(\"content\", content)\n\treturn content\n}\n\n// GetVotersN 返回 n 个点赞的用户，如果 n < 0，返回所有点赞的用户\nfunc (a *Answer) GetVotersN(n int) []*User {\n\tif n == 0 {\n\t\treturn nil\n\t}\n\n\tquerystring := fmt.Sprintf(`params={\"answer_id\":\"%d\"}`, a.GetID())\n\turl := makeZhihuLink(\"/node/AnswerFullVoteInfoV2\" + \"?\" + querystring)\n\tdoc, err := newDocumentFromURL(url)\n\tif err != nil {\n\t\treturn nil\n\t}\n\n\tsel := doc.Find(\".voters span\")\n\tcapacity := n\n\tif capacity < 0 || capacity > sel.Length() {\n\t\tcapacity = sel.Length()\n\t}\n\tvoters := make([]*User, 0, capacity)\n\n\tsel.EachWithBreak(func(index int, span *goquery.Selection) bool {\n\t\tuserId := strings.Trim(strip(span.Text()), \"、\")\n\t\tvar userLink string\n\t\tif !(userId == \"匿名用户\" || userId == \"知乎用户\") {\n\t\t\tpath, _ := span.Find(\"a\").Attr(\"href\")\n\t\t\tuserLink = makeZhihuLink(path)\n\t\t}\n\t\tvoters = append(voters, NewUser(userLink, userId))\n\t\tif n > 0 && len(voters) == n {\n\t\t\treturn false\n\t\t}\n\t\treturn true\n\t})\n\n\treturn voters\n}\n\n// GetVoters 返回点赞的用户\nfunc (a *Answer) GetVoters() []*User {\n\treturn a.GetVotersN(-1)\n}\n\n// GetCommentsNum 返回评论数量\nfunc (a *Answer) GetCommentsNum() int {\n\tif value, ok := a.getIntField(\"comment-num\"); ok {\n\t\treturn value\n\t}\n\n\tdoc := a.Doc()\n\ttext := strip(doc.Find(\"a.meta-item.toggle-comment\").Text())\n\trv := reMatchInt(text)\n\ta.setField(\"comment-num\", rv)\n\treturn rv\n}\n\n// GetCollectedNum 返回被收藏次数\nfunc (a *Answer) GetCollectedNum() int {\n\tif value, ok := a.getIntField(\"collected-num\"); ok {\n\t\treturn value\n\t}\n\n\ttext := strip(a.Doc().Find(`a[data-za-l=\"sidebar_answer_collected_count\"]`).Text())\n\tvalue, _ := strconv.Atoi(text)\n\ta.setField(\"collected-num\", value)\n\treturn value\n}\n\nfunc (a *Answer) String() string {\n\treturn fmt.Sprintf(\"<Answer: %s - %s>\", a.GetAuthor().String(), a.Link)\n}\n\nfunc (a *Answer) setContent(value string) {\n\ta.setField(\"content\", value)\n}\n\nfunc (a *Answer) setUpvote(value int) {\n\ta.setField(\"upvote\", value)\n}\n\nfunc upvoteTextToNum(text string) int {\n\trv := 0\n\tif strings.HasSuffix(text, \"K\") {\n\t\tnum, _ := strconv.Atoi(text[0 : len(text)-1])\n\t\trv = num * 1000\n\t} else if strings.HasPrefix(text, \"W\") {\n\t\tnum, _ := strconv.Atoi(text[0 : len(text)-1])\n\t\trv = num * 10000\n\t} else {\n\t\trv, _ = strconv.Atoi(text)\n\t}\n\treturn rv\n}\n\n// 把一个回答的主体部分导出成 HTML 代码，与原码相比，做了这些操作：\n// \t1. 去掉无用的 noscript 标签\n// \t2. 修复 img 的 src 值\n// \t3. 移除无用的 icon\n// \t4. 如果是自己的回答，移除末尾的 “修改” 链接\nfunc answerSelectionToHtml(sel *goquery.Selection) (string, error) {\n\tsel.RemoveClass()\n\n\tsel.Find(\"noscript\").Each(func(_ int, tag *goquery.Selection) {\n\t\ttag.Remove() // 把无用的 noscript 去掉\n\t})\n\n\tsel.Find(\"i.icon-external\").Each(func(_ int, tag *goquery.Selection) {\n\t\ttag.Remove() // 把无用的 icon 去掉\n\t})\n\n\tsel.Find(\"a.zu-edit-button\").Remove() // 把 “修改” 链接去掉\n\n\t// 修复 img 的 src\n\tsel.Find(\"img\").Each(func(_ int, tag *goquery.Selection) {\n\t\tvar src string\n\t\tif tag.HasClass(\"origin_image\") {\n\t\t\tsrc, _ = tag.Attr(\"data-original\")\n\t\t} else {\n\t\t\tsrc, _ = tag.Attr(\"data-actualsrc\")\n\t\t}\n\t\ttag.SetAttr(\"src\", src)\n\t\tif tag.Next().Size() == 0 {\n\t\t\ttag.AfterHtml(\"<br>\")\n\t\t}\n\t})\n\n\t// 修复 a 标签的 href，因为知乎的外链都是这种形式：https://link.zhihu.com/?target=xxx\n\tsel.Find(\"a\").Each(func(_ int, tag *goquery.Selection) {\n\t\thref, _ := tag.Attr(\"href\")\n\t\tif strings.Contains(href, \"target=\") {\n\t\t\tlink, err := url.Parse(href)\n\t\t\tif err != nil {\n\t\t\t\treturn\n\t\t\t}\n\t\t\ttarget := link.Query().Get(\"target\")\n\t\t\ttag.SetAttr(\"href\", target)\n\t\t}\n\t})\n\n\twrapper := `<html><head><meta charset=\"utf-8\"></head><body></body></html>`\n\tdoc, _ := goquery.NewDocumentFromReader(strings.NewReader(wrapper))\n\tdoc.Find(\"body\").AppendSelection(sel)\n\n\treturn doc.Html()\n}\n\nfunc newUserFromAnswerAuthorTag(sel *goquery.Selection) *User {\n\tif strip(sel.Text()) == \"匿名用户\" {\n\t\treturn ANONYMOUS\n\t}\n\n\tnode := sel.Find(\"a.author-link\")\n\tuserId := strip(node.Text())\n\turlPath, _ := node.Attr(\"href\")\n\tuserLink := makeZhihuLink(urlPath)\n\treturn NewUser(userLink, userId)\n}\n"
  },
  {
    "path": "collection.go",
    "content": "package zhihu\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"net/url\"\n\t\"strconv\"\n\t\"strings\"\n\n\t\"github.com/PuerkitoBio/goquery\"\n)\n\n// Collection 是一个知乎的收藏夹页面\ntype Collection struct {\n\t*Page\n\n\t// creator 是该收藏夹的创建者\n\tcreator *User\n\n\t// name 是该收藏夹的名称\n\tname string\n}\n\n// NewCollection 创建一个收藏夹对象，返回 *Collection\nfunc NewCollection(link string, name string, creator *User) *Collection {\n\tif !validCollectionURL(link) {\n\t\tpanic(\"收藏夹链接不正确：\" + link)\n\t}\n\n\treturn &Collection{\n\t\tPage:    newZhihuPage(link),\n\t\tcreator: creator,\n\t\tname:    name,\n\t}\n}\n\n// GetName 返回收藏夹的名字\nfunc (c *Collection) GetName() string {\n\tif c.name != \"\" {\n\t\treturn c.name\n\t}\n\n\tdoc := c.Doc()\n\n\t// <h2 class=\"zm-item-title zm-editable-content\" id=\"zh-fav-head-title\">\n\t//   恩恩恩 大力一点，不要停～\n\t// </h2>\n\tc.name = strip(doc.Find(\"h2#zh-fav-head-title\").Text())\n\treturn c.name\n}\n\n// GetCreator 返回收藏夹的创建者\nfunc (c *Collection) GetCreator() *User {\n\tif c.creator != nil {\n\t\treturn c.creator\n\t}\n\n\tdoc := c.Doc()\n\n\t// <h2 class=\"zm-list-content-title\">\n\t//   <a href=\"/people/leonyoung\">李阳良</a>\n\t// </h2>\n\tsel := doc.Find(\"h2.zm-list-content-title a\")\n\tuserId := strip(sel.Text())\n\tlinkPath, _ := sel.Attr(\"href\")\n\tc.creator = NewUser(makeZhihuLink(linkPath), userId)\n\treturn c.creator\n}\n\n// GetFollowersNum 返回收藏夹的关注者数量\nfunc (c *Collection) GetFollowersNum() int {\n\tif got, ok := c.getIntField(\"followers-num\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := c.Doc()\n\n\t// <a href=\"/collection/19653044/followers\" data-za-c=\"collection\" ,=\"\" data-za-a=\"visit_collection_followers\" data-za-l=\"collection_followers_count\">\n\t//   7516\n\t// </a>\n\ttext := strip(doc.Find(`a[data-za-a=\"visit_collection_followers\"]`).Text())\n\tnum, _ := strconv.Atoi(text)\n\tc.setField(\"followers-num\", num)\n\treturn num\n}\n\n// GetFollowersN 返回 n 个关注该收藏夹的用户，如果 n < 0，返回所有关注者\nfunc (c *Collection) GetFollowersN(n int) []*User {\n\tvar (\n\t\tlink = urlJoin(c.Link, \"/followers\")\n\t\txsrf = c.GetXSRF()\n\t)\n\tusers, err := ajaxGetFollowers(link, xsrf, n)\n\tif err != nil {\n\t\treturn nil\n\t}\n\treturn users\n}\n\n// GetFollowers 返回关注该收藏夹的用户\nfunc (c *Collection) GetFollowers() []*User {\n\treturn c.GetFollowersN(c.GetFollowersNum())\n}\n\n// GetQuestionsN 返回前 n 个问题，如果 n < 0，返回所有问题\nfunc (c *Collection) GetQuestionsN(n int) []*Question {\n\tif n == 0 {\n\t\treturn nil\n\t}\n\n\t// 先获取第一页的问题\n\tquestions := getQuestionsFromDoc(c.Doc())\n\n\ttotalPages := c.totalPages()\n\tif totalPages == 1 {\n\t\tif n < 0 || n > len(questions) {\n\t\t\treturn questions\n\t\t}\n\t\treturn questions[0:n]\n\t}\n\n\t// 再分页查询其他问题\n\tcurrentPage := 2\n\tfor currentPage <= totalPages {\n\t\tlink := fmt.Sprintf(\"%s?page=%d\", c.Link, currentPage)\n\t\tdoc, err := newDocumentFromURL(link)\n\t\tif err != nil {\n\t\t\tlogger.Error(\"解析页面失败：%s, %s\", link, err.Error())\n\t\t\treturn nil\n\t\t}\n\n\t\tnewQuestions := getQuestionsFromDoc(doc)\n\t\tquestions = append(questions, newQuestions...)\n\t\tif n > 0 && len(questions) >= n {\n\t\t\treturn questions[0:n]\n\t\t}\n\t\tcurrentPage++\n\t}\n\n\treturn questions\n}\n\n// GetQuestions 返回收藏夹里所有的问题\nfunc (c *Collection) GetQuestions() []*Question {\n\treturn c.GetQuestionsN(-1)\n}\n\n// GetAnswersN 返回 n 个回答，如果 n < 0，返回所有回答\nfunc (c *Collection) GetAnswersN(n int) []*Answer {\n\tif n == 0 {\n\t\treturn nil\n\t}\n\n\t// 先获取第一页的回答\n\tanswers := getAnswersFromDoc(c.Doc())\n\n\ttotalPages := c.totalPages()\n\tif totalPages == 1 {\n\t\tif n < 0 || n > len(answers) {\n\t\t\treturn answers\n\t\t}\n\t\treturn answers[0:n]\n\t}\n\n\t// 在分页查询\n\tcurrentPage := 2\n\tfor currentPage <= totalPages {\n\t\tlink := fmt.Sprintf(\"%s?page=%d\", c.Link, currentPage)\n\t\tdoc, err := newDocumentFromURL(link)\n\t\tif err != nil {\n\t\t\tlogger.Error(\"解析页面失败：%s, %s\", link, err.Error())\n\t\t\treturn nil\n\t\t}\n\n\t\tnewAnswers := getAnswersFromDoc(doc)\n\t\tanswers = append(answers, newAnswers...)\n\t\tif n > 0 && len(answers) >= n {\n\t\t\treturn answers[0:n]\n\t\t}\n\t\tcurrentPage++\n\t}\n\treturn answers\n}\n\n// GetAnswers 返回收藏夹里所有的回答\nfunc (c *Collection) GetAnswers() []*Answer {\n\treturn c.GetAnswersN(-1)\n}\n\n// GetQuestionsNum 返回收藏夹的问题数量\nfunc (c *Collection) GetQuestionsNum() int {\n\tif value, ok := c.getIntField(\"question-num\"); ok {\n\t\treturn value\n\t}\n\n\t// 根据分页情况来计算问题数量\n\t// 收藏夹页面，每一页固定 10 个问题，每个问题下可能有多个答案；\n\ttotalPages := c.totalPages()\n\tlastPage := c.Doc()\n\n\tif totalPages > 1 {\n\t\tlp, err := newDocumentFromURL(fmt.Sprintf(\"%s?page=%d\", c.Link, totalPages))\n\t\tif err != nil {\n\t\t\tlogger.Error(\"获取收藏夹最后一页失败：%s\", err.Error())\n\t\t\treturn 0\n\t\t}\n\t\tlastPage = lp\n\t}\n\n\tnumOnLastPage := lastPage.Find(\"#zh-list-answer-wrap h2.zm-item-title\").Size()\n\trv := (totalPages-1)*10 + numOnLastPage\n\tc.setField(\"question-num\", rv)\n\treturn rv\n}\n\n// GetAnswersNum 返回收藏夹的答案数量\n// 获取答案数量有这几种方式：\n// \t1. 在收藏夹页面（/collections/1234567），遍历每一页，累计每页的回答数量。总请求数等于分页数。\n//\t2. 在收藏夹创建者的个人主页，收藏夹栏目（people/xxyy/collections），有每个收藏夹的简介，\n//     其中就有回答数。遍历每一页（20个/页），找到对应的收藏夹，然后获取回答数。\n//     总请求数不确定，最好情况下 1 次；但考虑到每个用户的收藏夹并不会很多（如达到100个），可以认为最坏情况下需要 5 次。\n// 最终的方案可以综合以上两种方式，以收藏夹页面分页数做依据：\n//  如果页数大于 3（经验值），则采用方法 2；否则用方法 1\n// 希望能通过这样的方式来减少请求数，获得更好的性能。\nfunc (c *Collection) GetAnswersNum() int {\n\tif value, ok := c.getIntField(\"answer-num\"); ok {\n\t\treturn value\n\t}\n\n\trv := 0\n\ttotalPages := c.totalPages()\n\tif totalPages > 3 {\n\t\t// 从个人主页上获取\n\t\tpage := 1\n\t\tlinkFmt := urlJoin(c.GetCreator().Link, \"/collections?page=%d\")\n\t\tcollectionHref := strings.Split(c.Link, \"zhihu.com\")[1]\n\t\tselector := fmt.Sprintf(`a.zm-profile-fav-item-title[href=\"%s\"]`, collectionHref)\n\t\tfor {\n\t\t\tcreatorCollectionLink := fmt.Sprintf(linkFmt, page)\n\t\t\tdoc, err := newDocumentFromURL(creatorCollectionLink)\n\t\t\tif err != nil {\n\t\t\t\tlogger.Error(\"获取用户的收藏夹主页失败：%s\", err.Error())\n\t\t\t\treturn 0\n\t\t\t}\n\t\t\ttitleTag := doc.Find(selector).First()\n\t\t\tif titleTag.Size() == 1 {\n\t\t\t\trv = reMatchInt(titleTag.Parent().Next().Contents().Eq(0).Text())\n\t\t\t\tbreak\n\t\t\t} else {\n\t\t\t\t// 本页没找到，下一页\n\t\t\t\tif doc.Find(\"div.border-pager\").Size() == 0 {\n\t\t\t\t\treturn 0\n\t\t\t\t} else {\n\t\t\t\t\tpages := getTotalPages(doc)\n\t\t\t\t\tif page == pages {\n\t\t\t\t\t\treturn 0\n\t\t\t\t\t}\n\t\t\t\t\tpage++\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t} else {\n\t\tselector := \"#zh-list-answer-wrap div.zm-item-fav\"\n\t\trv = c.Doc().Find(selector).Size()\n\t\tcurrentPage := 2\n\t\tfor currentPage <= totalPages {\n\t\t\tlink := fmt.Sprintf(\"%s?page=%d\", c.Link, currentPage)\n\t\t\tdoc, err := newDocumentFromURL(link)\n\t\t\tif err != nil {\n\t\t\t\tlogger.Error(\"解析页面失败：%s, %s\", link, err.Error())\n\t\t\t\treturn 0\n\t\t\t}\n\t\t\trv += doc.Find(selector).Size()\n\t\t\tcurrentPage++\n\t\t}\n\t}\n\tc.setField(\"answer-num\", rv)\n\treturn rv\n}\n\n// GetCommentsNum 返回评论数量\nfunc (c *Collection) GetCommentsNum() int {\n\tif value, ok := c.getIntField(\"comment-num\"); ok {\n\t\treturn value\n\t}\n\n\tdoc := c.Doc()\n\ttext := strip(doc.Find(\"div#zh-list-meta-wrap  a.toggle-comment\").Text())\n\trv := reMatchInt(text)\n\tc.setField(\"comment-num\", rv)\n\treturn rv\n}\n\nfunc (c *Collection) String() string {\n\treturn fmt.Sprintf(\"<Collection: %s - %s>\", c.GetName(), c.Link)\n}\n\nfunc ajaxGetFollowers(link string, xsrf string, total int) ([]*User, error) {\n\tif total == 0 {\n\t\treturn nil, nil\n\t}\n\n\tvar (\n\t\toffset     = 0\n\t\tgotDataNum = pageSize\n\t\tinitCap    = total\n\t)\n\n\tif initCap < 0 {\n\t\tinitCap = pageSize\n\t}\n\tusers := make([]*User, 0, initCap)\n\n\tform := url.Values{}\n\tform.Set(\"_xsrf\", xsrf)\n\n\tfor gotDataNum == pageSize {\n\t\tform.Set(\"offset\", strconv.Itoa(offset))\n\t\tdoc, dataNum, err := newDocByNormalAjax(link, form)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\tdoc.Find(\"div.zm-profile-card\").Each(func(index int, sel *goquery.Selection) {\n\t\t\tthisUser := newUserFromSelector(sel)\n\t\t\tusers = append(users, thisUser)\n\t\t})\n\n\t\tif total > 0 && len(users) >= total {\n\t\t\treturn users[:total], nil\n\t\t}\n\n\t\tgotDataNum = dataNum\n\t\toffset += gotDataNum\n\t}\n\treturn users, nil\n}\n\nfunc newDocByNormalAjax(link string, form url.Values) (*goquery.Document, int, error) {\n\tgotDataNum := 0\n\tbody := strings.NewReader(form.Encode())\n\tresp, err := gSession.Ajax(link, body, link)\n\tif err != nil {\n\t\tlogger.Error(\"查询关注的话题失败, 链接：%s, 参数：%s，错误：%s\", link, form.Encode(), err.Error())\n\t\treturn nil, gotDataNum, err\n\t}\n\n\tdefer resp.Body.Close()\n\tresult := normalAjaxResult{}\n\terr = json.NewDecoder(resp.Body).Decode(&result)\n\tif err != nil {\n\t\tlogger.Error(\"解析返回值 json 失败：%s\", err.Error())\n\t\treturn nil, gotDataNum, err\n\t}\n\n\ttopicsHtml := result.Msg[1].(string)\n\tdoc, err := goquery.NewDocumentFromReader(strings.NewReader(topicsHtml))\n\tif err != nil {\n\t\tlogger.Error(\"解析返回的 HTML 失败：%s\", err.Error())\n\t\treturn nil, gotDataNum, err\n\t}\n\tgotDataNum = int(result.Msg[0].(float64))\n\treturn doc, gotDataNum, err\n}\n\nfunc getQuestionsFromDoc(doc *goquery.Document) []*Question {\n\tquestions := make([]*Question, 0, pageSize)\n\titems := doc.Find(\"div#zh-list-answer-wrap\").Find(\"h2.zm-item-title\")\n\titems.Each(func(index int, sel *goquery.Selection) {\n\t\ta := sel.Find(\"a\")\n\t\tqTitle := strip(a.Text())\n\t\tqHref, _ := a.Attr(\"href\")\n\t\tthisQuestion := NewQuestion(makeZhihuLink(qHref), qTitle)\n\t\tquestions = append(questions, thisQuestion)\n\t})\n\treturn questions\n}\n\nfunc getAnswersFromDoc(doc *goquery.Document) []*Answer {\n\tvar answers []*Answer\n\tvar lastQuestion *Question\n\n\tdoc.Find(\"div.zm-item\").Each(func(index int, sel *goquery.Selection) {\n\t\t// 回答\n\t\tcontentTag := sel.Find(\"div.zm-item-rich-text\")\n\t\tif contentTag.Size() == 0 {\n\t\t\t// 回答被建议修改\n\t\t\treason := strip(sel.Find(\"div.answer-status\").Text())\n\t\t\tlogger.Warn(\"忽略一个问题，原因：%s\", reason)\n\t\t\treturn\n\t\t}\n\n\t\t// 获取问题，如果同一个问题下收藏了多个回答，则除了第一个外，后面的回答的 HTML 部分，\n\t\t// 也就是 div.zm-item 里面不会有该问题的链接（a 标签），所以用 lastQuestion 标记\n\t\t// 最近的一个问题\n\t\tvar thisQuestion *Question\n\t\tif qTag := sel.Find(\"h2.zm-item-title\").Find(\"a\"); qTag.Size() > 0 {\n\t\t\tqTitle := strip(qTag.Text())\n\t\t\tqHref, _ := qTag.Attr(\"href\")\n\t\t\tthisQuestion = NewQuestion(makeZhihuLink(qHref), qTitle)\n\t\t\tlastQuestion = thisQuestion\n\t\t} else {\n\t\t\tthisQuestion = lastQuestion\n\t\t}\n\n\t\t// 答主\n\t\tauthor := newUserFromAnswerAuthorTag(sel.Find(\"div.zm-item-answer-author-info\"))\n\n\t\tanswerHref, _ := contentTag.Attr(\"data-entry-url\")\n\t\tvoteText, _ := sel.Find(\"a.zm-item-vote-count\").Attr(\"data-votecount\")\n\t\tvote, _ := strconv.Atoi(voteText)\n\t\tthisAnswer := NewAnswer(makeZhihuLink(answerHref), thisQuestion, author)\n\t\tthisAnswer.setUpvote(vote)\n\n\t\tanswers = append(answers, thisAnswer)\n\t})\n\treturn answers\n}\n"
  },
  {
    "path": "examples/config-example.json",
    "content": "{\n  \"account\": \"email-or-phonenum\",\n  \"password\": \"your-password-here\"\n}"
  },
  {
    "path": "examples/example.go",
    "content": "package main\n\nimport (\n\t\"fmt\"\n\t\"io/ioutil\"\n\n\t\"github.com/DeanThompson/zhihu-go\"\n)\n\nvar (\n\tlogger = zhihu.Logger{true}\n)\n\nfunc main() {\n\tzhihu.Init(\"./config.json\")\n\n\t// 黄继新，和知乎在一起\n\tuser := zhihu.NewUser(\"https://www.zhihu.com/people/jixin\", \"\")\n\tshowUser(user)\n\n\tlogger.Success(\"========== split ==========\")\n\n\t// Python 编程，应该养成哪些好的习惯？\n\tquestionUrl := \"https://www.zhihu.com/question/28966220\"\n\tquestion := zhihu.NewQuestion(questionUrl, \"\")\n\tshowQuestion(question)\n\n\tlogger.Success(\"========== split ==========\")\n\n\t// 龙有九个儿子，是跟谁生的？为什么「龙生九子，各不成龙」？豆子 的答案\n\tanswer := zhihu.NewAnswer(\"https://www.zhihu.com/question/23759686/answer/41997389\", nil, nil)\n\tshowAnswer(answer)\n\n\tlogger.Success(\"========== split ==========\")\n\n\t// 程序员为了期权加入创业公司，值得吗？ 匿名用户的答案\n\tanswer2 := zhihu.NewAnswer(\"https://www.zhihu.com/question/28023819/answer/49723406\", nil, nil)\n\tshowAnswer(answer2)\n\n\tlogger.Success(\"========== split ==========\")\n\n\t// 黄继新 A4U\n\tcollection := zhihu.NewCollection(\"https://www.zhihu.com/collection/19677733\", \"\", nil)\n\tshowCollection(collection)\n\n\t// Python\n\ttopic := zhihu.NewTopic(\"https://www.zhihu.com/topic/19552832\", \"\")\n\tshowTopic(topic)\n}\n\nfunc showQuestion(question *zhihu.Question) {\n\tlogger.Info(\"Question fields:\")\n\tlogger.Info(\"\turl: %s\", question.Link)\n\tlogger.Info(\"\ttitle: %s\", question.GetTitle())\n\tlogger.Info(\"\tdetail: %s\", question.GetDetail())\n\tlogger.Info(\"\tanswers num: %d\", question.GetAnswersNum())\n\tlogger.Info(\"\tfollowers num: %d\", question.GetFollowersNum())\n\tlogger.Info(\"\tcomments num: %d\", question.GetCommentsNum())\n\n\tfor i, topic := range question.GetTopics() {\n\t\tlogger.Info(\"\ttopic-%d: %s\", i+1, topic.String())\n\t}\n\n\tfor i, follower := range question.GetFollowersN(5) {\n\t\tlogger.Info(\"\ttop follower-%d: %s\", i+1, follower.String())\n\t}\n\n\tfor i, follower := range question.GetFollowers() {\n\t\tlogger.Info(\"\tfollower-%d: %s\", i+1, follower.String())\n\t\tif i >= 10 {\n\t\t\tlogger.Info(\"\t%d followers not shown.\", question.GetFollowersNum()-i-1)\n\t\t\tbreak\n\t\t}\n\t}\n\n\tallAnswers := question.GetAllAnswers()\n\tfor i, answer := range allAnswers {\n\t\tlogger.Info(\"\tanswer-%d: %s\", i+1, answer.String())\n\t\tfilename := fmt.Sprintf(\"/tmp/%s-%s的回答.html\", question.GetTitle(), answer.GetAuthor().GetUserID())\n\t\tdumpAnswerHTML(filename, answer)\n\t\tif i >= 10 {\n\t\t\tlogger.Info(\"\t%d answers not shown.\", len(allAnswers)-i-1)\n\t\t\tbreak\n\t\t}\n\t}\n\n\ttopXAnswers := question.GetTopXAnswers(25)\n\tfor i, answer := range topXAnswers {\n\t\tlogger.Info(\"\ttop-%d answer: %s\", i+1, answer.String())\n\t}\n\n\tlogger.Info(\"\ttop-1 answer: %s\", question.GetTopAnswer().String())\n\tlogger.Info(\"\tvisit times: %d\", question.GetVisitTimes())\n}\n\nfunc showAnswer(answer *zhihu.Answer) {\n\tlogger.Info(\"Answer fields:\")\n\tlogger.Info(\"\turl: %s\", answer.Link)\n\n\tquestion := answer.GetQuestion()\n\tlogger.Info(\"\tquestion url: %s\", question.Link)\n\tlogger.Info(\"\tquestion title: %s\", question.GetTitle())\n\n\tlogger.Info(\"\tauthor: %s\", answer.GetAuthor().String())\n\tlogger.Info(\"\tupvote num: %d\", answer.GetUpvote())\n\tlogger.Info(\"\tcomments num: %d\", answer.GetCommentsNum())\n\tlogger.Info(\"\tcollected num: %d\", answer.GetCollectedNum())\n\tlogger.Info(\"\tdata ID: %d\", answer.GetID())\n\n\t// dump content\n\tfilename := fmt.Sprintf(\"/tmp/answer_%d.html\", answer.GetID())\n\tdumpAnswerHTML(filename, answer)\n\n\tvoters := answer.GetVoters()\n\tfor i, voter := range voters {\n\t\tlogger.Info(\"\tvoter-%d: %s\", i+1, voter.String())\n\t\tif i >= 10 {\n\t\t\tremain := len(voters) - i - 1\n\t\t\tlogger.Info(\"\t%d votes not shown.\", remain)\n\t\t\tbreak\n\t\t}\n\t}\n}\n\nfunc showCollection(collection *zhihu.Collection) {\n\tlogger.Info(\"Collection fields:\")\n\tlogger.Info(\"\turl: %s\", collection.Link)\n\tlogger.Info(\"\tname: %s\", collection.GetName())\n\tlogger.Info(\"\tcreator: %s\", collection.GetCreator().String())\n\tlogger.Info(\"\tfollowers num: %d\", collection.GetFollowersNum())\n\tlogger.Info(\"\tcomments num: %d\", collection.GetCommentsNum())\n\tlogger.Info(\"\tquestions num: %d\", collection.GetQuestionsNum())\n\tlogger.Info(\"\tanswers num: %d\", collection.GetAnswersNum())\n\n\tfor i, follower := range collection.GetFollowersN(5) {\n\t\tlogger.Info(\"\ttop follower-%d: %s\", i+1, follower.String())\n\t}\n\n\tfor i, follower := range collection.GetFollowers() {\n\t\tlogger.Info(\"\tfollower-%d: %s\", i+1, follower.String())\n\t}\n\n\tfor i, question := range collection.GetQuestionsN(5) {\n\t\tlogger.Info(\"\ttop question-%d: %s\", i+1, question.String())\n\t}\n\n\tfor i, question := range collection.GetQuestions() {\n\t\tlogger.Info(\"\tquestion-%d: %s\", i+1, question.String())\n\t}\n\n\tfor i, answer := range collection.GetAnswersN(5) {\n\t\tlogger.Info(\"\ttop answer-%d: %s\", i+1, answer.String())\n\t}\n\n\tfor i, answer := range collection.GetAnswers() {\n\t\tlogger.Info(\"\tanswer-%d: %s\", i+1, answer.String())\n\t}\n}\n\nfunc showUser(user *zhihu.User) {\n\tlogger.Info(\"User fields:\")\n\tlogger.Info(\"\tis anonymous: %v\", user.IsAnonymous())\n\tlogger.Info(\"\tuserId: %s\", user.GetUserID())\n\tlogger.Info(\"\tdataId: %s\", user.GetDataID())\n\tlogger.Info(\"\tavatar: %s\", user.GetAvatar())\n\tlogger.Info(\"\tavatar with size hd: %s\", user.GetAvatarWithSize(\"hd\"))\n\tlogger.Info(\"\tbio: %s\", user.GetBio())\n\tlogger.Info(\"\tlocation: %s\", user.GetLocation())\n\tlogger.Info(\"\tbusiness: %s\", user.GetBusiness())\n\tlogger.Info(\"\teducation: %s\", user.GetEducation())\n\tlogger.Info(\"\tgender: %s\", user.GetGender())\n\tlogger.Info(\"\tweibo: %s\", user.GetWeiboURL())\n\tlogger.Info(\"\tfollowers num: %d\", user.GetFollowersNum())\n\tlogger.Info(\"\tfollowees num: %d\", user.GetFolloweesNum())\n\tlogger.Info(\"\tfollowed columns num: %d\", user.GetFollowedColumnsNum())\n\tlogger.Info(\"\tfollowed topics num: %d\", user.GetFollowedTopicsNum())\n\tlogger.Info(\"\tagree num: %d\", user.GetAgreeNum())\n\tlogger.Info(\"\tthanks num: %d\", user.GetThanksNum())\n\tlogger.Info(\"\tasks num: %d\", user.GetAsksNum())\n\tlogger.Info(\"\tanswers num: %d\", user.GetAnswersNum())\n\tlogger.Info(\"\tposts num: %d\", user.GetPostsNum())\n\tlogger.Info(\"\tcollections num: %d\", user.GetCollectionsNum())\n\tlogger.Info(\"\tlogs num: %d\", user.GetLogsNum())\n\n\tfor i, topic := range user.GetFollowedTopicsN(5) {\n\t\tlogger.Info(\"\ttop followed topic-%d: %s\", i+1, topic.String())\n\t}\n\n\t//\tfor i, topic := range user.GetFollowedTopics() {\n\t//\t\tlogger.Info(\"\tfollowed topic-%d: %s\", i+1, topic.String())\n\t//\t}\n\n\tfor i, follower := range user.GetFollowersN(5) {\n\t\tlogger.Info(\"\ttop follower-%d: %s\", i+1, follower.String())\n\t}\n\n\t//\tfor i, follower := range user.GetFollowers() {\n\t//\t\tlogger.Info(\"\tfollower-%d: %s\", i+1, follower.String())\n\t//\t}\n\n\tfor i, followee := range user.GetFolloweesN(5) {\n\t\tlogger.Info(\"\ttop followee-%d: %s\", i+1, followee.String())\n\t}\n\n\t//\tfor i, followee := range user.GetFollowees() {\n\t//\t\tlogger.Info(\"\tfollowee-%d: %s\", i+1, followee.String())\n\t//\t}\n\n\tfor i, ask := range user.GetAsksN(5) {\n\t\tlogger.Info(\"\ttop ask-%d: %s\", i+1, ask.String())\n\t}\n\n\t//\tfor i, ask := range user.GetAsks() {\n\t//\t\tlogger.Info(\"\task-%d: %s\", i+1, ask.String())\n\t//\t}\n\n\tfor i, answer := range user.GetAnswersN(5) {\n\t\tlogger.Info(\"\ttop answer-%d: %s\", i+1, answer.String())\n\t}\n\n\t//\tfor i, answer := range user.GetAnswers() {\n\t//\t\tlogger.Info(\"\tanswer-%d: %s\", i+1, answer.String())\n\t//\t}\n\n\tfor i, collection := range user.GetCollectionsN(5) {\n\t\tlogger.Info(\"\ttop collection-%d: %s\", i+1, collection.String())\n\t}\n\n\t//\tfor i, collection := range user.GetCollections() {\n\t//\t\tlogger.Info(\"\tcollection-%d: %s\", i+1, collection.String())\n\t//\t}\n\n\tfor i, like := range user.GetLikes() {\n\t\tlogger.Info(\"\tlike-%d: %s\", i+1, like.String())\n\t}\n}\n\nfunc showTopic(topic *zhihu.Topic) {\n\tlogger.Info(\"Topic fields:\")\n\tlogger.Info(\"\turl: %s\", topic.Link)\n\tlogger.Info(\"\tname: %s\", topic.GetName())\n\tlogger.Info(\"\tdescription: %s\", topic.GetDescription())\n\tlogger.Info(\"\tfollowers num: %d\", topic.GetFollowersNum())\n\n\tfor i, author := range topic.GetTopAuthors() {\n\t\tlogger.Info(\"\ttop-%d author: %s\", i+1, author.String())\n\t}\n}\n\nfunc dumpAnswerHTML(filename string, answer *zhihu.Answer) error {\n\terr := ioutil.WriteFile(filename, []byte(answer.GetContent()), 0666)\n\tif err == nil {\n\t\tlogger.Info(\"\tcontent dumped to %s\", filename)\n\t}\n\treturn err\n}\n"
  },
  {
    "path": "log.go",
    "content": "package zhihu\n\nimport (\n\t\"fmt\"\n\n\t\"github.com/fatih/color\"\n)\n\n// Logger 是一个简单的输出工具，可以输出不同颜色的信息\n// TODO simple level\ntype Logger struct {\n\tEnabled bool\n}\n\nfunc (logger *Logger) log(a ...interface{}) {\n\tif logger.Enabled {\n\t\tfmt.Println(a...)\n\t}\n}\n\n// Error 输出 error 级别的日志\nfunc (logger *Logger) Error(msg string, a ...interface{}) {\n\tlogger.log(color.RedString(\"ERROR: \"+msg, a...))\n}\n\n// Warn 输出 warning 级别的日志\nfunc (logger *Logger) Warn(msg string, a ...interface{}) {\n\tlogger.log(color.YellowString(\"WARN: \"+msg, a...))\n}\n\n// Warning 是 Warn 的别名\nfunc (logger *Logger) Warning(msg string, a ...interface{}) {\n\tlogger.Warn(msg, a...)\n}\n\n// Info 输出 info 级别的日志\nfunc (logger *Logger) Info(msg string, a ...interface{}) {\n\tlogger.log(color.BlueString(\"INFO: \"+msg, a...))\n}\n\n// Debug 输出 debug 级别的日志\nfunc (logger *Logger) Debug(msg string, a ...interface{}) {\n\tlogger.log(color.WhiteString(\"DEBUG: \"+msg, a...))\n}\n\n// Success 输出 success 的日志，基本上与 info 一样，除了使用了绿色\nfunc (logger *Logger) Success(msg string, a ...interface{}) {\n\tlogger.log(color.GreenString(\"SUCCESS: \"+msg, a...))\n}\n"
  },
  {
    "path": "log_test.go",
    "content": "package zhihu\n\nimport (\n\t\"testing\"\n)\n\nfunc Test_Error(t *testing.T) {\n\tvar logger = Logger{Enabled: true}\n\tlogger.Error(\"测试：输出一条 ERROR 的信息\")\n\tlogger.Error(\"测试：从 1 到 5 分别是：%d, %d, %d, %d, %d\", 1, 2, 3, 4, 5)\n}\n\nfunc Test_Info(t *testing.T) {\n\tvar logger = Logger{Enabled: true}\n\tlogger.Info(\"测试：输出一条 INFO 的信息\")\n\tlogger.Info(\"测试：从 1 到 5 分别是：%d, %d, %d, %d, %d\", 1, 2, 3, 4, 5)\n}\n"
  },
  {
    "path": "question.go",
    "content": "package zhihu\n\nimport (\n\t\"encoding/json\"\n\t\"errors\"\n\t\"fmt\"\n\t\"net/url\"\n\t\"strconv\"\n\t\"strings\"\n\n\t\"github.com/PuerkitoBio/goquery\"\n)\n\n// Question 表示一个知乎问题，可以用于获取其标题、详情、答案等信息\ntype Question struct {\n\t*Page\n\n\t// title 是该问题的标题\n\ttitle string\n}\n\n// NewQuestion 通过给定的 URL 创建一个 Question 对象\nfunc NewQuestion(link string, title string) *Question {\n\tif !validQuestionURL(link) {\n\t\tpanic(\"问题链接不正确: \" + link)\n\t}\n\n\treturn &Question{\n\t\tPage:  newZhihuPage(link),\n\t\ttitle: title,\n\t}\n}\n\n// GetTitle 获取问题标题\nfunc (q *Question) GetTitle() string {\n\tif q.title != \"\" {\n\t\treturn q.title\n\t}\n\n\tdoc := q.Doc()\n\tq.title = strip(doc.Find(\"h2.zm-item-title\").First().Text())\n\treturn q.title\n}\n\n// GetDetail 获取问题描述\nfunc (q *Question) GetDetail() string {\n\tif got, ok := q.getStringField(\"detail\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := q.Doc()\n\tdetail := strip(doc.Find(\"div#zh-question-detail\").First().Text())\n\tq.setField(\"detail\", detail)\n\treturn detail\n}\n\n// GetAnswersNum 获取问题回答数量\nfunc (q *Question) GetAnswersNum() int {\n\tif got, ok := q.getIntField(\"answers-num\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := q.Doc()\n\tdata, exists := doc.Find(\"h3#zh-question-answer-num\").Attr(\"data-num\")\n\tanswerNum := 0\n\tif exists {\n\t\tanswerNum, _ = strconv.Atoi(data)\n\t}\n\tq.setField(\"answers-num\", answerNum)\n\treturn answerNum\n}\n\n// GetFollowersNum 获取问题关注数量\nfunc (q *Question) GetFollowersNum() int {\n\tif got, ok := q.getIntField(\"followers-num\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := q.Doc()\n\ttext := doc.Find(\"div.zg-gray-normal>a>strong\").Text()\n\tfollowersNum, _ := strconv.Atoi(text)\n\tq.setField(\"followers-num\", followersNum)\n\treturn followersNum\n}\n\n// GetTopics 获取问题的话题列表\nfunc (q *Question) GetTopics() []*Topic {\n\tvar topics []*Topic\n\tq.Doc().Find(\"a.zm-item-tag\").Each(func(index int, sel *goquery.Selection) {\n\t\tname := strip(sel.Text())\n\t\thref, _ := sel.Attr(\"href\")\n\t\tthisTopic := NewTopic(makeZhihuLink(href), name)\n\t\ttopics = append(topics, thisTopic)\n\t})\n\treturn topics\n}\n\n// GetFollowersN 返回 n 个关注者，如果 n < 0，返回所有关注者\nfunc (q *Question) GetFollowersN(n int) []*User {\n\tvar (\n\t\tlink = urlJoin(q.Link, \"/followers\")\n\t\txsrf = q.GetXSRF()\n\t)\n\tusers, err := ajaxGetFollowers(link, xsrf, n)\n\tif err != nil {\n\t\treturn nil\n\t}\n\treturn users\n}\n\n// GetFollowers 获取关注该问题的用户\nfunc (q *Question) GetFollowers() []*User {\n\treturn q.GetFollowersN(q.GetFollowersNum())\n}\n\n// GetAllAnswers 获取问题的所有答案\nfunc (q *Question) GetAllAnswers() []*Answer {\n\treturn q.GetTopXAnswers(q.GetAnswersNum())\n}\n\n// GetTopXAnswers 获取问题 Top X 的答案\nfunc (q *Question) GetTopXAnswers(x int) []*Answer {\n\tif x < 0 || x > q.GetAnswersNum() {\n\t\tx = q.GetAnswersNum()\n\t}\n\n\t// 1. 首页的回答\n\tanswers := q.getAnswersOnIndex()\n\n\tif x < len(answers) {\n\t\treturn answers[:x]\n\t}\n\n\t// 2. \"更多\"，调用 Ajax 接口\n\tmoreCount := x - pageSize\n\tif moreCount > 0 {\n\t\tanswers = append(answers, q.getMoreAnswers(moreCount)...)\n\t}\n\n\treturn answers\n}\n\n// GetTopAnswer 获取问题排名第一的答案\nfunc (q *Question) GetTopAnswer() *Answer {\n\ttopAnswers := q.GetTopXAnswers(1)\n\tif len(topAnswers) >= 1 {\n\t\treturn topAnswers[0]\n\t}\n\treturn nil\n}\n\n// GetCommentsNum 返回问题的评论数量\nfunc (q *Question) GetCommentsNum() int {\n\tif value, ok := q.getIntField(\"comment-num\"); ok {\n\t\treturn value\n\t}\n\n\tdoc := q.Doc()\n\ttext := doc.Find(\"div.zm-meta-panel a.toggle-comment\").Text()\n\trv := reMatchInt(strip(text))\n\tq.setField(\"comment-num\", rv)\n\treturn rv\n}\n\n// GetVisitTimes 获取问题的访问次数\nfunc (q *Question) GetVisitTimes() int {\n\tif got, ok := q.getIntField(\"visit-times\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := q.Doc()\n\tcontent, exists := doc.Find(`meta[itemprop=\"visitsCount\"]`).Attr(\"content\")\n\tvisitTimes := 0\n\tif exists {\n\t\tvisitTimes, _ = strconv.Atoi(content)\n\t}\n\tq.setField(\"visit-times\", visitTimes)\n\treturn visitTimes\n}\n\nfunc (q *Question) String() string {\n\treturn fmt.Sprintf(\"<Question: %s - %s>\", q.GetTitle(), q.Link)\n}\n\n// getAnswersOnIndex 解析问题页面，返回第一页的回答\nfunc (q *Question) getAnswersOnIndex() []*Answer {\n\ttotalNum := q.GetAnswersNum()\n\tanswers := make([]*Answer, 0, minInt(pageSize, totalNum))\n\n\tdoc := q.Doc()\n\n\tdoc.Find(\"div.zm-item-answer\").Each(func(index int, sel *goquery.Selection) {\n\t\tanswers = append(answers, q.processSingleAnswer(sel))\n\t})\n\treturn answers\n}\n\n// getAnswersByAjax 处理 “更多” 回答，调用 Ajax 接口\nfunc (q *Question) getAnswersByAjax(page int) ([]*Answer, error) {\n\toffset := page * pageSize\n\tif offset > q.GetAnswersNum() {\n\t\treturn nil, errors.New(\"No more answers.\")\n\t}\n\n\t// 如果 URL 是 https://www.zhihu.com/question/23759686，则 urlToken 是 23759686\n\turlToken, _ := strconv.Atoi(q.Link[len(q.Link)-8 : len(q.Link)])\n\n\tform := url.Values{}\n\tform.Set(\"_xsrf\", q.GetXSRF())\n\tform.Set(\"method\", \"next\")\n\tform.Set(\"params\", fmt.Sprintf(`{\"url_token\":%d,\"pagesize\":%d,\"offset\":%d}`, urlToken, pageSize, offset))\n\n\tlink := makeZhihuLink(\"/node/QuestionAnswerListV2\")\n\tbody := strings.NewReader(form.Encode())\n\tresp, err := gSession.Ajax(link, body, q.Link)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\tdefer resp.Body.Close()\n\tresult := nodeListResult{}\n\terr = json.NewDecoder(resp.Body).Decode(&result)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\tanswers := make([]*Answer, 0, len(result.Msg))\n\tfor _, answerHtml := range result.Msg {\n\t\tdoc, err := goquery.NewDocumentFromReader(strings.NewReader(answerHtml))\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\t\tthisAnswer := q.processSingleAnswer(doc.Selection)\n\t\tanswers = append(answers, thisAnswer)\n\t}\n\n\treturn answers, nil\n}\n\n// getMoreAnswers 执行多次“更多”\nfunc (q *Question) getMoreAnswers(limit int) []*Answer {\n\tanswers := make([]*Answer, 0, limit)\n\tindex := 0\n\ttotalPage := (limit + pageSize - 1) / pageSize\n\tfor index < totalPage {\n\t\tpage := index + 1\n\t\tmoreAnswers, err := q.getAnswersByAjax(page)\n\t\tif err != nil {\n\t\t\tlogger.Error(\"加载第 %d 页回答失败，问题：%s，错误：%s\", page, q.Link, err.Error())\n\t\t} else {\n\t\t\tanswers = append(answers, moreAnswers...)\n\t\t}\n\t\tindex++\n\t}\n\treturn answers\n}\n\n// processSingleAnswer 处理一个回答的 HTML 片段，\n// 这段 HTML 可能来自问题页面，也可能来自 Ajax 接口\nfunc (q *Question) processSingleAnswer(sel *goquery.Selection) *Answer {\n\t// 1. 获取链接\n\tanswerHref, _ := sel.Find(\"a.answer-date-link\").Attr(\"href\")\n\tanswerLink := makeZhihuLink(answerHref)\n\n\t// 2. 获取作者\n\tauthorSel := sel.Find(\"div.zm-item-answer-author-info\")\n\tvar author *User\n\tif authorSel.Find(\"a.author-link\").Size() == 0 {\n\t\t// 匿名用户\n\t\tauthor = ANONYMOUS\n\t} else {\n\t\t// 具名用户\n\t\tx := authorSel.Find(\"a.author-link\")\n\t\tuserID := strip(x.Text())\n\t\tuserHref, _ := x.Attr(\"href\")\n\t\tauthor = NewUser(makeZhihuLink(userHref), userID)\n\t}\n\n\tanswer := NewAnswer(answerLink, q, author)\n\n\t// 3. 获取赞同数\n\tdataIsOwner, _ := sel.Attr(\"data-isowner\")\n\tisOwner := dataIsOwner == \"1\" // 判断是否本人的回答\n\tvar voteText string\n\tif isOwner {\n\t\tvoteText = strip(sel.Find(\"a.zm-item-vote-count\").Text())\n\t} else {\n\t\tvoteText = strip(sel.Find(\"div.zm-votebar\").Find(\"span.count\").Text())\n\t}\n\tanswer.setUpvote(upvoteTextToNum(voteText))\n\n\t// 4. 获取内容\n\tcontent, _ := answerSelectionToHtml(sel.Find(\"div.zm-editable-content\"))\n\tanswer.setContent(content)\n\n\treturn answer\n}\n\nfunc (q *Question) setFollowersNum(value int) {\n\tq.setField(\"followers-num\", value)\n}\n\nfunc (q *Question) setAnswersNum(value int) {\n\tq.setField(\"answers-num\", value)\n}\n\nfunc (q *Question) setVisitTimes(value int) {\n\tq.setField(\"visit-times\", value)\n}\n"
  },
  {
    "path": "question_test.go",
    "content": "package zhihu\n\nimport \"testing\"\n\nfunc init_session() {\n\tInit(\"./examples/config.json\")\n}\n\nfunc Test_GetTitle(t *testing.T) {\n\tinit_session()\n\n\tquestion := NewQuestion(\"https://www.zhihu.com/question/41171543\", \"\")\n\tgot := question.GetTitle()\n\twant := \"如何评价第一局比赛 AlphaGo 战胜李世石？\"\n\tlogger.Info(\"got title: %s\", got)\n\tlogger.Info(\"expected title: %s\", want)\n\tif got != want {\n\t\tt.Error(\"GetTitle() returns error result\")\n\t}\n}\n\nfunc Test_GetDetail(t *testing.T) {\n\tinit_session()\n\n\tquestion := NewQuestion(\"https://www.zhihu.com/question/41171543\", \"\")\n\tgot := question.GetDetail()\n\twant := \"本题已收录至知乎圆桌 » 对弈人工智能，更多关于李世石对战人工智能的解读欢迎关注讨论。\"\n\tlogger.Info(\"got detail: %s\", got)\n\tlogger.Info(\"expected detail: %s\", want)\n\tif got != want {\n\t\tt.Error(\"GetDetail() returns error result\")\n\t}\n}\n"
  },
  {
    "path": "session.go",
    "content": "package zhihu\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"io\"\n\t\"io/ioutil\"\n\t\"net/http\"\n\t\"net/url\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"regexp\"\n\t\"strconv\"\n\t\"strings\"\n\t\"time\"\n\n\t\"github.com/juju/persistent-cookiejar\"\n)\n\n// Auth 是用于登录的信息，保存了用户名和密码\ntype Auth struct {\n\tAccount  string `json:\"account\"`\n\tPassword string `json:\"password\"`\n\n\tloginType string // phone_num 或 email\n\tloginURL  string // 通过 Account 判断\n}\n\n// isEmail 判断是否通过邮箱登录\nfunc (auth *Auth) isEmail() bool {\n\treturn isEmail(auth.Account)\n}\n\n// isPhone 判断是否通过手机号登录\nfunc (auth *Auth) isPhone() bool {\n\treturn regexp.MustCompile(`^1[0-9]{10}$`).MatchString(auth.Account)\n}\n\nfunc (auth *Auth) toForm() url.Values {\n\tif auth.isEmail() {\n\t\tauth.loginType = \"email\"\n\t\tauth.loginURL = makeZhihuLink(\"/login/email\")\n\t} else if auth.isPhone() {\n\t\tauth.loginType = \"phone_num\"\n\t\tauth.loginURL = makeZhihuLink(\"/login/phone_num\")\n\t} else {\n\t\tpanic(\"无法判断登录类型: \" + auth.Account)\n\t}\n\tvalues := url.Values{}\n\tlogger.Info(\"登录类型：%s, 登录地址：%s\", auth.loginType, auth.loginURL)\n\tvalues.Set(auth.loginType, auth.Account)\n\tvalues.Set(\"password\", auth.Password)\n\tvalues.Set(\"remember_me\", \"true\") // import!\n\treturn values\n}\n\n// Session 保持和知乎服务器的会话，用于向服务器发起请求获取 HTML 或 JSON 数据\ntype Session struct {\n\tauth   *Auth\n\tclient *http.Client\n}\n\ntype loginResult struct {\n\tR         int         `json:\"r\"`\n\tMsg       string      `json:\"msg\"`\n\tErrorCode int         `json:\"errcode\"`\n\tData      interface{} `json:\"data\"`\n}\n\n// NewSession 创建并返回一个 *Session 对象，\n// 这里没有初始化登录账号信息，账号信息用 `LoadConfig` 通过配置文件进行设置\nfunc NewSession() *Session {\n\ts := new(Session)\n\tcookieJar, _ := cookiejar.New(nil)\n\ts.client = &http.Client{\n\t\tJar: cookieJar,\n\t}\n\treturn s\n}\n\n// LoadConfig 从配置文件中读取账号信息\n// 配置文件 是 JSON 格式：\n// {\n//   \"account\": \"xyz@example.com\",\n//   \"password\": \"p@ssw0rd\"\n// }\nfunc (s *Session) LoadConfig(cfg string) {\n\tfd, err := os.Open(cfg)\n\tif err != nil {\n\t\tpanic(\"无法打开配置文件 config.json: \" + err.Error())\n\t}\n\tdefer fd.Close()\n\n\tauth := new(Auth)\n\terr = json.NewDecoder(fd).Decode(&auth)\n\tif err != nil {\n\t\tpanic(\"解析配置文件出错: \" + err.Error())\n\t}\n\n\ts.auth = auth\n\t// TODO 如果设置了与上一次不一样的账号，最好把 cookies 重置\n}\n\n// Login 登录并保存 cookies\nfunc (s *Session) Login() error {\n\tif s.authenticated() {\n\t\tlogger.Success(\"已经是登录状态，不需要重复登录\")\n\t\treturn nil\n\t}\n\n\tform := s.buildLoginForm().Encode()\n\tbody := strings.NewReader(form)\n\treq, err := http.NewRequest(\"POST\", s.auth.loginURL, body)\n\tif err != nil {\n\t\tlogger.Error(\"构造登录请求失败：%s\", err.Error())\n\t\treturn err\n\t}\n\n\theaders := newHTTPHeaders(true)\n\theaders.Set(\"Content-Length\", strconv.Itoa(len(form)))\n\theaders.Set(\"Content-Type\", \"application/x-www-form-urlencoded\")\n\theaders.Set(\"Referer\", baseZhihuURL)\n\treq.Header = headers\n\n\tlogger.Info(\"登录中，用户名：%s\", s.auth.Account)\n\n\tresp, err := s.client.Do(req)\n\tif err != nil {\n\t\tlogger.Error(\"登录失败：%s\", err.Error())\n\t\treturn err\n\t}\n\n\tif strings.ToLower(resp.Header.Get(\"Content-Type\")) != \"application/json\" {\n\t\tlogger.Error(\"服务器没有返回 json 数据\")\n\t\treturn fmt.Errorf(\"未知的 Content-Type: %s\", resp.Header.Get(\"Content-Type\"))\n\t}\n\n\tdefer resp.Body.Close()\n\tresult := loginResult{}\n\tcontent, err := ioutil.ReadAll(resp.Body)\n\tif err != nil {\n\t\tlogger.Error(\"读取响应内容失败：%s\", err.Error())\n\t}\n\n\tlogger.Info(\"登录响应内容：%s\", strings.Replace(string(content), \"\\n\", \"\", -1))\n\n\terr = json.Unmarshal(content, &result)\n\tif err != nil {\n\t\tlogger.Error(\"JSON 解析失败：%s\", err.Error())\n\t\treturn err\n\t}\n\n\tif result.R == 0 {\n\t\tlogger.Success(\"登录成功！\")\n\t\ts.client.Jar.(*cookiejar.Jar).Save()\n\t\treturn nil\n\t}\n\tif result.R == 1 {\n\t\tlogger.Warn(\"登录失败！原因：%s\", result.Msg)\n\t\treturn fmt.Errorf(\"登录失败！原因：%s\", result.Msg)\n\t}\n\n\tlogger.Error(\"登录出现未知错误：%s\", string(content))\n\treturn fmt.Errorf(\"登录失败，未知错误：%s\", string(content))\n}\n\n// Get 发起一个 GET 请求，自动处理 cookies\nfunc (s *Session) Get(url string) (*http.Response, error) {\n\tlogger.Info(\"GET %s\", url)\n\treq, err := http.NewRequest(\"GET\", url, nil)\n\tif err != nil {\n\t\tlogger.Error(\"NewRequest failed with URL: %s\", url)\n\t\treturn nil, err\n\t}\n\n\treq.Header = newHTTPHeaders(false)\n\treturn s.client.Do(req)\n}\n\n// Post 发起一个 POST 请求，自动处理 cookies\nfunc (s *Session) Post(url string, bodyType string, body io.Reader) (*http.Response, error) {\n\tlogger.Info(\"POST %s, %s\", url, bodyType)\n\treq, err := http.NewRequest(\"POST\", url, body)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\theaders := newHTTPHeaders(false)\n\theaders.Set(\"Content-Type\", bodyType)\n\treq.Header = headers\n\treturn s.client.Do(req)\n}\n\n// Ajax 发起一个 Ajax 请求，自动处理 cookies\nfunc (s *Session) Ajax(url string, body io.Reader, referer string) (*http.Response, error) {\n\tlogger.Info(\"AJAX %s, referrer %s\", url, referer)\n\treq, err := http.NewRequest(\"POST\", url, body)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\theaders := newHTTPHeaders(true)\n\theaders.Set(\"Content-Type\", \"application/x-www-form-urlencoded\")\n\theaders.Set(\"Referer\", referer)\n\treq.Header = headers\n\treturn s.client.Do(req)\n}\n\n// authenticated 检查是否已经登录（cookies 没有失效）\nfunc (s *Session) authenticated() bool {\n\toriginURL := makeZhihuLink(\"/settings/profile\")\n\tresp, err := s.Get(originURL)\n\tif err != nil {\n\t\tlogger.Error(\"访问 profile 页面出错: %s\", err.Error())\n\t\treturn false\n\t}\n\n\t// 如果没有登录，会跳转到 http://www.zhihu.com/?next=%2Fsettings%2Fprofile\n\tlastURL := resp.Request.URL.String()\n\tlogger.Info(\"获取 profile 的请求，跳转到了：%s\", lastURL)\n\treturn lastURL == originURL\n}\n\nfunc (s *Session) buildLoginForm() url.Values {\n\tvalues := s.auth.toForm()\n\tvalues.Set(\"_xsrf\", s.searchXSRF())\n\tvalues.Set(\"captcha\", s.downloadCaptcha())\n\treturn values\n}\n\n// 从 cookies 获取 _xsrf 用于 POST 请求\nfunc (s *Session) searchXSRF() string {\n\tresp, err := s.Get(baseZhihuURL)\n\tif err != nil {\n\t\tpanic(\"获取 _xsrf 失败：\" + err.Error())\n\t}\n\n\t// retrieve from cookies\n\tfor _, cookie := range resp.Cookies() {\n\t\tif cookie.Name == \"_xsrf\" {\n\t\t\treturn cookie.Value\n\t\t}\n\t}\n\n\treturn \"\"\n}\n\n// downloadCaptcha 获取验证码，用于登录\nfunc (s *Session) downloadCaptcha() string {\n\turl := makeZhihuLink(fmt.Sprintf(\"/captcha.gif?r=%d&type=login\", 1000*time.Now().Unix()))\n\tlogger.Info(\"获取验证码：%s\", url)\n\tresp, err := s.Get(url)\n\tif err != nil {\n\t\tpanic(\"获取验证码失败：\" + err.Error())\n\t}\n\tif resp.StatusCode != http.StatusOK {\n\t\tpanic(fmt.Sprintf(\"获取验证码失败，StatusCode = %d\", resp.StatusCode))\n\t}\n\n\tdefer resp.Body.Close()\n\n\tfileExt := strings.Split(resp.Header.Get(\"Content-Type\"), \"/\")[1]\n\tverifyImg := filepath.Join(getCwd(), \"verify.\"+fileExt)\n\tfd, err := os.OpenFile(verifyImg, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0777)\n\tif err != nil {\n\t\tpanic(\"打开验证码文件失败：\" + err.Error())\n\t}\n\tdefer fd.Close()\n\n\tio.Copy(fd, resp.Body)        // 保存验证码文件\n\topenCaptchaFile(verifyImg)    // 调用外部程序打开\n\tcaptcha := readCaptchaInput() // 读取用户输入\n\n\treturn captcha\n}\n\nvar (\n\tgSession = NewSession() // 全局的 Session，调用 Init() 初始化\n)\n\n// Init 用于传入配置文件，配置全局的 Session\nfunc Init(cfgFile string) {\n\t// 配置账号信息\n\tgSession.LoadConfig(cfgFile)\n\n\t// 登录\n\tgSession.Login()\n}\n\n// SetSession 用于替换默认的 session\nfunc SetSession(s *Session) {\n\tgSession = s\n}\n"
  },
  {
    "path": "session_test.go",
    "content": "package zhihu\n\nimport (\n\t\"testing\"\n)\n\nconst cfgFile = \"./examples/config.json\"\n\nfunc Test_searchXsrf(t *testing.T) {\n\ts := NewSession()\n\tlogger.Debug(\"_xsrf: %s\", s.searchXSRF())\n}\n\n//func Test_downloadCaptcha(t *testing.T) {\n//\ts := NewSession(\"./example/config.json\")\n//\ts.downloadCaptcha()\n//}\n\n//func Test_buildLoginForm(t *testing.T) {\n//\ts := &Session{}\n//\ts.LoadConfig()\n//\tvalues := s.buildLoginForm()\n//\tfmt.Println(values.Encode())\n//}\n"
  },
  {
    "path": "topic.go",
    "content": "package zhihu\n\nimport (\n\t\"fmt\"\n\t\"strconv\"\n\n\t\"github.com/PuerkitoBio/goquery\"\n)\n\ntype Topic struct {\n\t*Page\n\n\t// name 是改话题的名称\n\tname string\n}\n\nfunc NewTopic(link string, name string) *Topic {\n\tif !validTopicURL(link) {\n\t\tpanic(\"非法的 Topic 链接：%s\" + link)\n\t}\n\treturn &Topic{\n\t\tPage: newZhihuPage(link),\n\t\tname: name,\n\t}\n}\n\n// GetName 返回话题名称\nfunc (t *Topic) GetName() string {\n\tif t.name != \"\" {\n\t\treturn t.name\n\t}\n\n\t// <h1 class=\"zm-editable-content\" data-disabled=\"1\">Python</h1>\n\tt.name = strip(t.Doc().Find(\"h1.zm-editable-content\").Text())\n\treturn t.name\n}\n\n// GetDescription 返回话题的描述\nfunc (t *Topic) GetDescription() string {\n\tif got, ok := t.getStringField(\"description\"); ok {\n\t\treturn got\n\t}\n\n\t// <div class=\"zm-editable-content\" data-editable-maxlength=\"130\">\n\t//   Python 是一种面向对象的解释型计算机程序设计语言，在设计中注重代码的可读性，同时也是一种功能强大的通用型语言。\n\t//   <a href=\"javascript:;\" class=\"zu-edit-button\" name=\"edit\">\n\t//     <i class=\"zu-edit-button-icon\"></i>修改\n\t//   </a>\n\t// </div>\n\tdescription := strip(t.Doc().Find(\"div.zm-editable-content\").Text())\n\tt.setField(\"description\", description)\n\treturn description\n}\n\n// GetFollowersNum 返回关注者数量\nfunc (t *Topic) GetFollowersNum() int {\n\tif got, ok := t.getIntField(\"followers-num\"); ok {\n\t\treturn got\n\t}\n\n\t// <div class=\"zm-topic-side-followers-info\">\n\t// \t <a href=\"/topic/19552832/followers\">\n\t//     <strong>82155</strong>\n\t//   </a> 人关注了该话题\n\t// </div>\n\ttext := strip(t.Doc().Find(\"div.zm-topic-side-followers-info strong\").Text())\n\tnum, _ := strconv.Atoi(text)\n\tt.setField(\"followers-num\", num)\n\treturn num\n}\n\n// GetTopAuthors 返回最佳回答者，一般来说是 5 个\nfunc (t *Topic) GetTopAuthors() []*User {\n\tauthors := make([]*User, 0, 5)\n\tdiv := t.Doc().Find(\"div#zh-topic-top-answerer\")\n\tdiv.Find(\"div.zm-topic-side-person-item-content\").Each(func(index int, sel *goquery.Selection) {\n\t\ttag := sel.Find(\"a\").First()\n\t\tuHref, _ := tag.Attr(\"href\")\n\t\tuId := strip(tag.Text())\n\n\t\tthisAuthor := NewUser(makeZhihuLink(uHref), uId)\n\n\t\tbio, _ := sel.Find(\"div.zm-topic-side-bio\").Attr(\"title\")\n\t\tthisAuthor.setBio(bio)\n\n\t\tauthors = append(authors, thisAuthor)\n\t})\n\treturn authors\n}\n\nfunc (t *Topic) String() string {\n\treturn fmt.Sprintf(\"<Topic: %s - %s>\", t.GetName(), t.Link)\n}\n"
  },
  {
    "path": "user.go",
    "content": "package zhihu\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"net/url\"\n\t\"strconv\"\n\t\"strings\"\n\n\t\"github.com/PuerkitoBio/goquery\"\n)\n\nvar (\n\tANONYMOUS = NewUser(\"\", \"匿名用户\")\n)\n\n// User 表示一个知乎用户\ntype User struct {\n\t*Page\n\n\t// userId 表示用户的知乎 ID（用户名）\n\tuserID string\n}\n\n// NewUser 创建一个用户对象。\n// link 为空的时候表示匿名用户，此时 userId 仅允许 \"匿名用户\" 或 \"知乎用户\"；\n// userId 可以为空，这种情况下调用 GetUserID 会去解析用户主页\nfunc NewUser(link string, userID string) *User {\n\tif link == \"\" && !isAnonymous(userID) {\n\t\tpanic(\"调用 NewUser 的参数不合法\")\n\t}\n\n\treturn &User{\n\t\tPage:   newZhihuPage(link),\n\t\tuserID: userID,\n\t}\n}\n\n// GetUserID 返回用户的知乎 ID\nfunc (user *User) GetUserID() string {\n\tif user.userID != \"\" {\n\t\treturn user.userID\n\t}\n\n\tdoc := user.Doc()\n\n\t// <div class=\"title-section ellipsis\">\n\t//   <span class=\"name\">黄继新</span>，\n\t//   <span class=\"bio\" title=\"和知乎在一起\">和知乎在一起</span>\n\t// </div>\n\tuser.userID = strip(doc.Find(\"div.title-section.ellipsis\").Find(\"span.name\").Text())\n\treturn user.userID\n}\n\n// GetDataID 返回用户的 data-id\nfunc (user *User) GetDataID() string {\n\tif user.IsAnonymous() {\n\t\treturn \"\"\n\t}\n\n\tif got, ok := user.getStringField(\"data-id\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := user.Doc()\n\n\t// 分两种情况：自己和其他用户\n\t// 1. 其他用户\n\t// <div class=\"zm-profile-header-op-btns clearfix\">\n\t//   <button data-follow=\"m:button\" data-id=\"e22dba11081f3d71afc10b9c8c641672\" class=\"zg-btn zg-btn-unfollow zm-rich-follow-btn\">取消关注</button>\n\t// </div>\n\t//\n\t// 2. 自己\n\t// <input type=\"hidden\" name=\"dest_id\" value=\"2f5c3f612108780e7d5400d8f74ab449\">\n\tvar dataID string\n\tbtns := doc.Find(\"div.zm-profile-header-op-btns\")\n\tif btns.Size() > 0 {\n\t\t// 1. 其他用户\n\t\tdataID, _ = btns.Find(\"button\").Attr(\"data-id\")\n\t} else {\n\t\t// 2. 自己\n\t\tscript := doc.Find(`script[data-name=\"ga_vars\"]`).Text()\n\t\tdata := make(map[string]interface{})\n\t\tjson.Unmarshal([]byte(script), &data)\n\t\tdataID = data[\"user_hash\"].(string)\n\t}\n\tuser.setField(\"data-id\", dataID)\n\treturn dataID\n}\n\n// GetBio 返回用户的 BIO\nfunc (user *User) GetBio() string {\n\tif user.IsAnonymous() {\n\t\treturn \"\"\n\t}\n\n\tif got, ok := user.getStringField(\"bio\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := user.Doc()\n\n\t// <span class=\"bio\" title=\"程序员，用 Python 和 Go 做服务端开发。\">程序员，用 Python 和 Go 做服务端开发。</span>\n\tbio := strip(doc.Find(\"span.bio\").Eq(0).Text())\n\tuser.setField(\"bio\", bio)\n\treturn bio\n}\n\n// GetLocation 返回用户所在地\nfunc (user *User) GetLocation() string {\n\treturn user.getProfile(\"location\")\n}\n\n// GetBusiness 返回用户的所在行业\nfunc (user *User) GetBusiness() string {\n\treturn user.getProfile(\"business\")\n}\n\n// GetEducation 返回用户的教育信息\nfunc (user *User) GetEducation() string {\n\treturn user.getProfile(\"education\")\n}\n\n// GetGender 返回用户的性别（male/female/unknown）\nfunc (user *User) GetGender() string {\n\tgender := \"unknown\"\n\tif user.IsAnonymous() {\n\t\treturn gender\n\t}\n\n\tif got, ok := user.getStringField(\"gender\"); ok {\n\t\treturn got\n\t}\n\n\tdoc := user.Doc()\n\n\t// <span class=\"item gender\"><i class=\"icon icon-profile-male\"></i></span>\n\tsel := doc.Find(\"span.gender\").Find(\"i\")\n\tif sel.HasClass(\"icon-profile-male\") {\n\t\tgender = \"male\"\n\t} else {\n\t\tgender = \"female\"\n\t}\n\tuser.setField(\"gender\", gender)\n\treturn gender\n}\n\n// GetAvatar 返回用户的头像 URL，默认的尺寸\nfunc (user *User) GetAvatar() string {\n\tif user.IsAnonymous() {\n\t\treturn \"\"\n\t}\n\n\tif got, ok := user.getStringField(\"avatar\"); ok {\n\t\treturn got\n\t}\n\n\timg := user.Doc().Find(\"div.body\").Find(\"img.Avatar\").First()\n\tavatar, _ := img.Attr(\"src\")\n\tuser.setField(\"avatar\", avatar)\n\treturn avatar\n}\n\n// GetAvatarWithSize 返回指定尺寸的的头像 URL，size 支持的值：s, xs, m, l, xl, hd, \"\"\nfunc (user *User) GetAvatarWithSize(size string) string {\n\tdefaultAvatar := user.GetAvatar()\n\tif defaultAvatar == \"\" {\n\t\treturn defaultAvatar\n\t}\n\n\tif !validateAvatarSize(size) {\n\t\treturn defaultAvatar\n\t}\n\n\treturn replaceAvatarSize(defaultAvatar, size)\n}\n\n// GetWeiboURL 返回用户的微博主页 URL\nfunc (user *User) GetWeiboURL() string {\n\tif user.IsAnonymous() {\n\t\treturn \"\"\n\t}\n\n\tif got, ok := user.getStringField(\"weibo-url\"); ok {\n\t\treturn got\n\t}\n\n\tvalue := \"\"\n\ttag := user.Doc().Find(\"a.zm-profile-header-user-weibo\")\n\tif tag.Size() > 0 {\n\t\tvalue, _ = tag.First().Attr(\"href\")\n\t}\n\tuser.setField(\"weibo-url\", value)\n\treturn value\n}\n\n// GetFollowersNum 返回用户的粉丝数量\nfunc (user *User) GetFollowersNum() int {\n\treturn user.getFollowersNumOrFolloweesNum(\"followers-num\")\n}\n\n// GetFolloweesNum 返回用户关注的数量\nfunc (user *User) GetFolloweesNum() int {\n\treturn user.getFollowersNumOrFolloweesNum(\"followees-num\")\n}\n\n// GetFollowedColumnsNum 返回用户关注的专栏数量\nfunc (user *User) GetFollowedColumnsNum() int {\n\treturn user.getFollowedColumnsOrTopicsNum(\"followed-columns-num\")\n}\n\n// GetFollowedTopicsNum 返回用户关注的话题数量\nfunc (user *User) GetFollowedTopicsNum() int {\n\treturn user.getFollowedColumnsOrTopicsNum(\"followed-topics-num\")\n}\n\n// GetAgreeNum 返回用户的点赞数\nfunc (user *User) GetAgreeNum() int {\n\treturn user.getAgreeOrThanksNum(\"agree-num\")\n}\n\n// GetThanksNum 返回用户的感谢数\nfunc (user *User) GetThanksNum() int {\n\treturn user.getAgreeOrThanksNum(\"thanks-num\")\n}\n\n// GetAsksNum 返回用户的提问数\nfunc (user *User) GetAsksNum() int {\n\treturn user.getProfileNum(\"asks-num\")\n}\n\n// GetAnswersNum 返回用户的回答数\nfunc (user *User) GetAnswersNum() int {\n\treturn user.getProfileNum(\"answers-num\")\n}\n\n// GetPostsNum 返回用户的专栏文章数量\nfunc (user *User) GetPostsNum() int {\n\treturn user.getProfileNum(\"posts-num\")\n}\n\n// GetCollectionsNum 返回用户的收藏夹数量\nfunc (user *User) GetCollectionsNum() int {\n\treturn user.getProfileNum(\"collections-num\")\n}\n\n// GetLogsNum 返回用户公共编辑数量\nfunc (user *User) GetLogsNum() int {\n\treturn user.getProfileNum(\"logs-num\")\n}\n\n// GetFolloweesN 返回前 n 个用户关注的人，如果 n < 0，返回所有关注的人\nfunc (user *User) GetFolloweesN(n int) []*User {\n\tusers, err := user.getFolloweesOrFollowers(\"followees\", n)\n\tif err != nil {\n\t\tlogger.Error(\"获取 %s 关注的人失败：%s\", user.String(), err.Error())\n\t\treturn nil\n\t}\n\treturn users\n}\n\n// GetFollowees 返回用户关注的人\nfunc (user *User) GetFollowees() []*User {\n\treturn user.GetFolloweesN(-1)\n}\n\n// GetFollowersN 返回前 n 个粉丝，如果 n < 0，返回所有粉丝\nfunc (user *User) GetFollowersN(n int) []*User {\n\tusers, err := user.getFolloweesOrFollowers(\"followers\", n)\n\tif err != nil {\n\t\tlogger.Error(\"获取 %s 的粉丝失败：%s\", user.String(), err.Error())\n\t\treturn nil\n\t}\n\treturn users\n\n}\n\n// GetFollowers 返回用户的粉丝列表\nfunc (user *User) GetFollowers() []*User {\n\treturn user.GetFollowersN(-1)\n}\n\n// GetAsksN 返回用户前 n 个提问，如果 n < 0, 返回所有提问\nfunc (user *User) GetAsksN(n int) []*Question {\n\tif user.IsAnonymous() {\n\t\treturn nil\n\t}\n\n\ttotal := user.GetAsksNum()\n\tif n < 0 || n > total {\n\t\tn = total\n\t}\n\tif n == 0 {\n\t\treturn nil\n\t}\n\n\tpage := 1\n\tquestions := make([]*Question, 0, n)\n\tfor page < ((n-1)/pageSize + 2) {\n\t\tlink := urlJoin(user.Link, fmt.Sprintf(\"/asks?page=%d\", page))\n\t\tdoc, err := newDocumentFromURL(link)\n\t\tif err != nil {\n\t\t\treturn nil\n\t\t}\n\n\t\tdoc.Find(\"div#zh-profile-ask-list\").Children().Each(func(index int, sel *goquery.Selection) {\n\t\t\ta := sel.Find(\"a.question_link\")\n\t\t\ttitle := strip(a.Text())\n\t\t\thref, _ := a.Attr(\"href\")\n\t\t\tquestionLink := makeZhihuLink(href)\n\t\t\tthisQuestion := NewQuestion(questionLink, title)\n\n\t\t\t// 获取回答数\n\t\t\tanswersNum := reMatchInt(strip(sel.Find(\"div.meta\").Contents().Eq(4).Text()))\n\t\t\tthisQuestion.setAnswersNum(answersNum)\n\n\t\t\t// 获取关注数\n\t\t\tfollowersNum := reMatchInt(strip(sel.Find(\"div.meta\").Contents().Eq(6).Text()))\n\t\t\tthisQuestion.setFollowersNum(followersNum)\n\n\t\t\t// 获取浏览量\n\t\t\tvisitTimes, _ := strconv.Atoi(strip(sel.Find(\"div.zm-profile-vote-num\").Text()))\n\t\t\tthisQuestion.setVisitTimes(visitTimes)\n\n\t\t\tquestions = append(questions, thisQuestion)\n\t\t})\n\n\t\tif n > 0 && len(questions) >= n {\n\t\t\treturn questions[:n]\n\t\t}\n\n\t\tpage++\n\t}\n\treturn questions\n}\n\n// GetAsks 返回用户所有的提问\nfunc (user *User) GetAsks() []*Question {\n\treturn user.GetAsksN(-1)\n}\n\n// GetAnswersN 返回用户前 n 个回答，如果 n < 0，返回所有回答\nfunc (user *User) GetAnswersN(n int) []*Answer {\n\tif user.IsAnonymous() {\n\t\treturn nil\n\t}\n\n\ttotal := user.GetAnswersNum()\n\tif n < 0 || n > total {\n\t\tn = total\n\t}\n\tif n == 0 {\n\t\treturn nil\n\t}\n\n\tpage := 1\n\tanswers := make([]*Answer, 0, n)\n\tfor page < ((n-1)/pageSize + 2) {\n\t\tlink := urlJoin(user.Link, fmt.Sprintf(\"/answers?page=%d\", page))\n\t\tdoc, err := newDocumentFromURL(link)\n\t\tif err != nil {\n\t\t\treturn nil\n\t\t}\n\n\t\tdoc.Find(\"div#zh-profile-answer-list\").Children().Each(func(index int, sel *goquery.Selection) {\n\t\t\ta := sel.Find(\"a.question_link\")\n\t\t\tqTitle := strip(a.Text())\n\t\t\tanswerHref, _ := a.Attr(\"href\")\n\t\t\tqLink := makeZhihuLink(answerHref[0:strings.Index(answerHref, \"/answer\")])\n\t\t\tquestion := NewQuestion(qLink, qTitle)\n\t\t\tthisAnswer := NewAnswer(makeZhihuLink(answerHref), question, user)\n\n\t\t\tvoteText, _ := sel.Find(\"a.zm-item-vote-count\").Attr(\"data-votecount\")\n\t\t\tvote, _ := strconv.Atoi(voteText)\n\t\t\tthisAnswer.setUpvote(vote)\n\n\t\t\tanswers = append(answers, thisAnswer)\n\t\t})\n\n\t\tif n > 0 && len(answers) >= n {\n\t\t\treturn answers[:n]\n\t\t}\n\n\t\tpage++\n\t}\n\n\treturn answers\n}\n\n// GetAnswers 返回用户所有的回答\nfunc (user *User) GetAnswers() []*Answer {\n\treturn user.GetAnswersN(-1)\n}\n\n// GetCollectionsN 返回用户前 n 个收藏夹，如果 n < 0，返回所有收藏夹\nfunc (user *User) GetCollectionsN(n int) []*Collection {\n\tif user.IsAnonymous() {\n\t\treturn nil\n\t}\n\n\ttotal := user.GetCollectionsNum()\n\tif n < 0 || n > total {\n\t\tn = total\n\t}\n\tif n == 0 {\n\t\treturn nil\n\t}\n\n\tpage := 1\n\tcollections := make([]*Collection, 0, n)\n\tfor page < ((n-1)/pageSize + 2) {\n\t\tlink := urlJoin(user.Link, fmt.Sprintf(\"/collections?page=%d\", page))\n\t\tdoc, err := newDocumentFromURL(link)\n\t\tif err != nil {\n\t\t\treturn nil\n\t\t}\n\n\t\tdoc.Find(\"div.zm-profile-section-item\").Each(func(index int, sel *goquery.Selection) {\n\t\t\ta := sel.Find(\"a.zm-profile-fav-item-title\")\n\t\t\tcName := strip(a.Text())\n\t\t\thref, _ := a.Attr(\"href\")\n\t\t\tcLink := makeZhihuLink(href)\n\t\t\tthisCollection := NewCollection(cLink, cName, user)\n\t\t\tcollections = append(collections, thisCollection)\n\t\t})\n\n\t\tif n > 0 && len(collections) >= n {\n\t\t\treturn collections[:n]\n\t\t}\n\n\t\tpage++\n\t}\n\n\treturn collections\n}\n\n// GetCollections 返回用户的收藏夹\nfunc (user *User) GetCollections() []*Collection {\n\treturn user.GetCollectionsN(-1)\n}\n\n// GetFollowedTopicsN 返回用户前 n 个关注的话题，如果 n < 0，返回所有话题\nfunc (user *User) GetFollowedTopicsN(n int) []*Topic {\n\tif user.IsAnonymous() {\n\t\treturn nil\n\t}\n\n\ttotal := user.GetFollowedTopicsNum()\n\tif n < 0 || n > total {\n\t\tn = total\n\t}\n\tif n == 0 {\n\t\treturn nil\n\t}\n\n\tvar (\n\t\tlink       = urlJoin(user.Link, \"/topics\")\n\t\tgotDataNum = pageSize\n\t\toffset     = 0\n\t\ttopics     = make([]*Topic, 0, n)\n\t)\n\n\tform := url.Values{}\n\tform.Set(\"_xsrf\", user.GetXSRF())\n\tform.Set(\"start\", \"0\")\n\n\tfor gotDataNum == pageSize {\n\t\tform.Set(\"offset\", strconv.Itoa(offset))\n\t\tdoc, dataNum, err := newDocByNormalAjax(link, form)\n\t\tif err != nil {\n\t\t\treturn nil\n\t\t}\n\n\t\tdoc.Find(\"div.zm-profile-section-item\").Each(func(index int, sel *goquery.Selection) {\n\t\t\ttName := strip(sel.Find(\"strong\").Text())\n\t\t\ttHref, _ := sel.Find(\"a.zm-list-avatar-link\").Attr(\"href\")\n\t\t\tthisTopic := NewTopic(makeZhihuLink(tHref), tName)\n\t\t\ttopics = append(topics, thisTopic)\n\t\t})\n\n\t\tif n > 0 && len(topics) >= n {\n\t\t\treturn topics[:n]\n\t\t}\n\n\t\tgotDataNum = dataNum\n\t\toffset += gotDataNum\n\t}\n\n\treturn topics\n}\n\n// GetFollowedTopics 返回用户关注的话题\nfunc (user *User) GetFollowedTopics() []*Topic {\n\treturn user.GetFollowedTopicsN(-1)\n}\n\n// GetLikes 返回用户赞过的回答\nfunc (user *User) GetLikes() []*Answer {\n\tif user.IsAnonymous() {\n\t\treturn nil\n\t}\n\t// TODO\n\treturn nil\n}\n\n// GetVotedAnswers 是 GetLikes 的别名\nfunc (user *User) GetVotedAnswers() []*Answer {\n\treturn user.GetLikes()\n}\n\n// IsAnonymous 表示该用户是否匿名用户\nfunc (user *User) IsAnonymous() bool {\n\treturn isAnonymous(user.userID)\n}\n\nfunc (user *User) String() string {\n\tif user.IsAnonymous() {\n\t\treturn fmt.Sprintf(\"<User: %s>\", user.userID)\n\t}\n\treturn fmt.Sprintf(\"<User: %s - %s>\", user.userID, user.Link)\n}\n\nfunc (user *User) getProfile(cacheKey string) string {\n\tif user.IsAnonymous() {\n\t\treturn \"\"\n\t}\n\n\tif got, ok := user.getStringField(cacheKey); ok {\n\t\treturn got\n\t}\n\n\tdoc := user.Doc()\n\n\t// <span class=\"location item\" title=\"深圳\">深圳</span>\n\t// <span class=\"business item\" title=\"互联网\">...</span>\n\t// <span class=\"education item\" title=\"中山大学\">...</span>\n\tvalue, _ := doc.Find(fmt.Sprintf(\"span.%s\", cacheKey)).Attr(\"title\")\n\tuser.setField(cacheKey, value)\n\treturn value\n}\n\nfunc (user *User) getFollowersNumOrFolloweesNum(cacheKey string) int {\n\tif user.IsAnonymous() {\n\t\treturn 0\n\t}\n\n\tif got, ok := user.getIntField(cacheKey); ok {\n\t\treturn got\n\t}\n\n\tvar index int\n\tswitch cacheKey {\n\tcase \"followees-num\":\n\t\tindex = 0\n\tcase \"followers-num\":\n\t\tindex = 1\n\tdefault:\n\t\treturn 0\n\t}\n\n\tdoc := user.Doc()\n\n\t// <div class=\"zm-profile-side-following zg-clear\">\n\t//   <a class=\"item\" href=\"/people/jixin/followees\">\n\t//     <span class=\"zg-gray-normal\">关注了</span><br><strong>9190</strong><label> 人</label>\n\t//   </a>\n\t//   <a class=\"item\" href=\"/people/jixin/followers\">\n\t//     <span class=\"zg-gray-normal\">关注者</span><br><strong>754769</strong><label> 人</label>\n\t//   </a>\n\t// </div>\n\tvalue := doc.Find(\"div.zm-profile-side-following a strong\").Eq(index).Text()\n\tnum, _ := strconv.Atoi(value)\n\tuser.setField(cacheKey, num)\n\treturn num\n}\n\nfunc (user *User) getFollowedColumnsOrTopicsNum(cacheKey string) int {\n\tif user.IsAnonymous() {\n\t\treturn 0\n\t}\n\n\tif got, ok := user.getIntField(cacheKey); ok {\n\t\treturn got\n\t}\n\n\tvar selector string\n\tswitch cacheKey {\n\tcase \"followed-topics-num\":\n\t\tselector = \"div.zm-profile-side-topics\"\n\tcase \"followed-columns-num\":\n\t\tselector = \"div.zm-profile-side-columns\"\n\tdefault:\n\t\treturn 0\n\t}\n\n\tdoc := user.Doc()\n\tresult := 0\n\tsel := doc.Find(selector)\n\tif sel.Size() > 0 {\n\t\ttext := sel.Parent().Find(\"a.zg-link-litblue\").Find(\"strong\").Text()\n\t\tresult = reMatchInt(strip(text))\n\t}\n\tuser.setField(cacheKey, result)\n\treturn result\n}\n\nfunc (user *User) getAgreeOrThanksNum(cacheKey string) int {\n\tif user.IsAnonymous() {\n\t\treturn 0\n\t}\n\n\tvar selector string\n\tswitch cacheKey {\n\tcase \"agree-num\":\n\t\tselector = \"span.zm-profile-header-user-agree > strong\"\n\tcase \"thanks-num\":\n\t\tselector = \"span.zm-profile-header-user-thanks > strong\"\n\tdefault:\n\t\treturn 0\n\t}\n\n\tif got, ok := user.getIntField(cacheKey); ok {\n\t\treturn got\n\t}\n\n\tdoc := user.Doc()\n\n\t// <div class=\"zm-profile-header-operation zg-clear \">\n\t//   <div class=\"zm-profile-header-info-list\">\n\t//     <span class=\"zm-profile-header-info-title\">获得</span>\n\t//     <span class=\"zm-profile-header-user-agree\"><span class=\"zm-profile-header-icon\"></span><strong>68200</strong>赞同</span>\n\t//     <span class=\"zm-profile-header-user-thanks\"><span class=\"zm-profile-header-icon\"></span><strong>17511</strong>感谢</span>\n\t//   </div>\n\t// </div>\n\tnum, _ := strconv.Atoi(doc.Find(selector).Text())\n\tuser.setField(cacheKey, num)\n\treturn num\n}\n\nfunc (user *User) getProfileNum(cacheKey string) int {\n\tif user.IsAnonymous() {\n\t\treturn 0\n\t}\n\n\tif got, ok := user.getIntField(cacheKey); ok {\n\t\treturn got\n\t}\n\n\tvar index int\n\tswitch cacheKey {\n\tcase \"asks-num\":\n\t\tindex = 0\n\tcase \"answers-num\":\n\t\tindex = 1\n\tcase \"posts-num\":\n\t\tindex = 2\n\tcase \"collections-num\":\n\t\tindex = 3\n\tcase \"logs-num\":\n\t\tindex = 4\n\tdefault:\n\t\treturn 0\n\t}\n\n\tdoc := user.Doc()\n\n\t// <div class=\"profile-navbar clearfix\">\n\t//   <a class=\"item home first active\" href=\"/people/jixin\"><i class=\"icon icon-profile-tab-home\"></i><span class=\"hide-text\">主页</span></a>\n\t//   <a class=\"item \" href=\"/people/jixin/asks\"> 提问 <span class=\"num\">1336</span></a>\n\t//   <a class=\"item \" href=\"/people/jixin/answers\"> 回答 <span class=\"num\">785</span></a>\n\t//   <a class=\"item \" href=\"/people/jixin/posts\"> 专栏文章 <span class=\"num\">91</span></a>\n\t//   <a class=\"item \" href=\"/people/jixin/collections\"> 收藏 <span class=\"num\">44</span></a>\n\t//   <a class=\"item \" href=\"/people/jixin/logs\"> 公共编辑 <span class=\"num\">51471</span></a>\n\t// </div>\n\tvalue := doc.Find(\"div.profile-navbar\").Find(\"span.num\").Eq(index).Text()\n\tnum, _ := strconv.Atoi(value)\n\tuser.setField(cacheKey, num)\n\treturn num\n}\n\nfunc (user *User) getFolloweesOrFollowers(eeOrEr string, limit int) ([]*User, error) {\n\tif user.IsAnonymous() {\n\t\treturn nil, nil\n\t}\n\n\tif limit == 0 {\n\t\treturn nil, nil\n\t}\n\n\tvar (\n\t\treferer, ajaxURL string\n\t\toffset, totalNum int\n\t\thashID           = user.GetDataID()\n\t)\n\n\tif eeOrEr == \"followees\" {\n\t\treferer = urlJoin(user.Link, \"/followees\")\n\t\tajaxURL = makeZhihuLink(\"/node/ProfileFolloweesListV2\")\n\t\ttotalNum = user.GetFollowersNum()\n\t} else {\n\t\treferer = urlJoin(user.Link, \"/followers\")\n\t\tajaxURL = makeZhihuLink(\"/node/ProfileFollowersListV2\")\n\t\ttotalNum = user.GetFolloweesNum()\n\t}\n\n\tif limit < 0 || limit > totalNum {\n\t\tlimit = totalNum\n\t}\n\n\tform := url.Values{}\n\tform.Set(\"_xsrf\", user.GetXSRF())\n\tform.Set(\"method\", \"next\")\n\n\tusers := make([]*User, 0, limit)\n\tfor {\n\t\tform.Set(\"params\", fmt.Sprintf(`{\"offset\":%d,\"order_by\":\"created\",\"hash_id\":\"%s\"}`, offset, hashID))\n\t\tbody := strings.NewReader(form.Encode())\n\t\tresp, err := gSession.Ajax(ajaxURL, body, referer)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\tdefer resp.Body.Close()\n\t\tresult := nodeListResult{}\n\t\terr = json.NewDecoder(resp.Body).Decode(&result)\n\t\tif err != nil {\n\t\t\tlogger.Error(\"json decode failed: %s\", err.Error())\n\t\t\treturn nil, err\n\t\t}\n\n\t\tfor _, userHTML := range result.Msg {\n\t\t\tthisUser, err := newUserFromHTML(userHTML)\n\t\t\tif err != nil {\n\t\t\t\treturn nil, err\n\t\t\t}\n\t\t\tusers = append(users, thisUser)\n\t\t\tif len(users) == limit {\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\n\t\t// 已经获取了需要的数量，或者数量不够，但是已经到了最后一页\n\t\tif len(users) == limit || len(result.Msg) < pageSize {\n\t\t\tbreak\n\t\t} else {\n\t\t\toffset += pageSize\n\t\t}\n\t}\n\treturn users, nil\n}\n\nfunc (user *User) setFollowersNum(value int) {\n\tuser.setField(\"followers-num\", value)\n}\n\nfunc (user *User) setAsksNum(value int) {\n\tuser.setField(\"asks-num\", value)\n}\n\nfunc (user *User) setAnswersNum(value int) {\n\tuser.setField(\"answers-num\", value)\n}\n\nfunc (user *User) setAgreeNum(value int) {\n\tuser.setField(\"agree-num\", value)\n}\n\nfunc (user *User) setBio(value string) {\n\tuser.setField(\"bio\", value)\n}\n\nfunc isAnonymous(userID string) bool {\n\treturn userID == \"匿名用户\" || userID == \"知乎用户\"\n}\n\nfunc newUserFromHTML(html string) (*User, error) {\n\tdoc, err := goquery.NewDocumentFromReader(strings.NewReader(html))\n\tif err != nil {\n\t\tlogger.Error(\"NewDocumentFromReader failed: %s\", err.Error())\n\t\treturn nil, err\n\t}\n\n\treturn newUserFromSelector(doc.Selection), nil\n}\n\nfunc newUserFromSelector(sel *goquery.Selection) *User {\n\ta := sel.Find(\"h2.zm-list-content-title\").Find(\"a.zg-link\")\n\tif a.Size() == 0 {\n\t\t// 匿名用户，没有用户主页入口\n\t\treturn ANONYMOUS\n\t}\n\n\tuserId := strip(a.Text())\n\tlink, _ := a.Attr(\"href\")\n\n\tuser := NewUser(link, userId)\n\n\t// 获取 BIO\n\tbio := strip(sel.Find(\"div.zg-big-gray\").Text())\n\tuser.setField(\"bio\", bio)\n\n\t// 获取关注者数量\n\tfollowersNum := reMatchInt(strip(sel.Find(\"div.details\").Find(\"a\").Eq(0).Text()))\n\tuser.setFollowersNum(followersNum)\n\n\t// 获取提问数\n\tasksNum := reMatchInt(strip(sel.Find(\"div.details\").Find(\"a\").Eq(1).Text()))\n\tuser.setAsksNum(asksNum)\n\n\t// 获取回答数\n\tanswersNum := reMatchInt(strip(sel.Find(\"div.details\").Find(\"a\").Eq(2).Text()))\n\tuser.setAnswersNum(answersNum)\n\n\t// 获取赞同数\n\tagreeNum := reMatchInt(strip(sel.Find(\"div.details\").Find(\"a\").Eq(3).Text()))\n\tuser.setAgreeNum(agreeNum)\n\n\treturn user\n}\n"
  },
  {
    "path": "util.go",
    "content": "package zhihu\n\nimport (\n\t\"fmt\"\n\t\"io/ioutil\"\n\t\"net/http\"\n\t\"os\"\n\t\"os/exec\"\n\t\"path/filepath\"\n\t\"regexp\"\n\t\"runtime\"\n\t\"strconv\"\n\t\"strings\"\n\n\t\"github.com/PuerkitoBio/goquery\"\n\t\"github.com/fatih/color\"\n)\n\nconst (\n\tuserAgent    = \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36\"\n\tbaseZhihuURL = \"https://www.zhihu.com\"\n\tpageSize     = 20\n)\n\nvar (\n\treQuestionURL    = regexp.MustCompile(\"^(http|https)://www.zhihu.com/question/[0-9]{8}$\")\n\treCollectionURL  = regexp.MustCompile(\"^(http|https)://www.zhihu.com/collection/[0-9]{8,9}$\") // bugfix: for private collection\n\treTopicURL       = regexp.MustCompile(\"^(http|https)://www.zhihu.com/topic/[0-9]{8}$\")\n\treGetNumber      = regexp.MustCompile(`([0-9])+`)\n\treAvatarReplacer = regexp.MustCompile(`_(s|xs|m|l|xl|hd).(png|jpg)`)\n\treIsEmail        = regexp.MustCompile(`^[a-z0-9._%+\\-]+@[a-z0-9.\\-]+\\.[a-z]{2,4}$`)\n\tlogger           = Logger{Enabled: true}\n)\n\nfunc validQuestionURL(value string) bool {\n\treturn reQuestionURL.MatchString(value)\n}\n\nfunc validCollectionURL(value string) bool {\n\treturn reCollectionURL.MatchString(value)\n}\n\nfunc validTopicURL(value string) bool {\n\treturn reTopicURL.MatchString(value)\n}\n\nfunc reMatchInt(raw string) int {\n\tmatched := reGetNumber.FindStringSubmatch(raw)\n\tif len(matched) == 0 {\n\t\treturn 0\n\t}\n\trv, _ := strconv.Atoi(matched[0])\n\treturn rv\n}\n\nfunc validateAvatarSize(size string) bool {\n\tfor _, x := range []string{\"s\", \"xs\", \"m\", \"l\", \"xl\", \"hd\"} {\n\t\tif size == x {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\n\nfunc replaceAvatarSize(origin string, size string) string {\n\treturn reAvatarReplacer.ReplaceAllString(origin, fmt.Sprintf(\"_%s.$2\", size))\n}\n\nfunc isEmail(value string) bool {\n\treturn reIsEmail.MatchString(value)\n}\n\nfunc newHTTPHeaders(isXhr bool) http.Header {\n\theaders := make(http.Header)\n\theaders.Set(\"Accept\", \"*/*\")\n\theaders.Set(\"Connection\", \"keep-alive\")\n\theaders.Set(\"Host\", \"www.zhihu.com\")\n\theaders.Set(\"Origin\", \"http://www.zhihu.com\")\n\theaders.Set(\"Pragma\", \"no-cache\")\n\theaders.Set(\"User-Agent\", userAgent)\n\tif isXhr {\n\t\theaders.Set(\"X-Requested-With\", \"XMLHttpRequest\")\n\t}\n\treturn headers\n}\n\nfunc strip(s string) string {\n\treturn strings.TrimSpace(s)\n}\n\nfunc minInt(a, b int) int {\n\tif a > b {\n\t\treturn b\n\t}\n\treturn a\n}\n\nfunc getCwd() string {\n\tcwd, err := os.Getwd()\n\tif err != nil {\n\t\tpanic(\"获取 CWD 失败：\" + err.Error())\n\t}\n\treturn cwd\n}\n\nfunc save(filename string, content []byte) error {\n\treturn ioutil.WriteFile(filename, content, 0666)\n}\n\nfunc saveString(filename string, content string) error {\n\treturn ioutil.WriteFile(filename, []byte(content), 0666)\n}\n\nfunc openCaptchaFile(filename string) error {\n\tlogger.Info(\"调用外部程序渲染验证码……\")\n\tvar args []string\n\tswitch runtime.GOOS {\n\tcase \"linux\":\n\t\targs = []string{\"xdg-open\", filename}\n\tcase \"darwin\":\n\t\targs = []string{\"open\", filename}\n\tcase \"freebsd\":\n\t\targs = []string{\"open\", filename}\n\tcase \"netbsd\":\n\t\targs = []string{\"open\", filename}\n\tcase \"windows\":\n\t\tvar (\n\t\t\tcmd      = \"url.dll,FileProtocolHandler\"\n\t\t\trunDll32 = filepath.Join(os.Getenv(\"SYSTEMROOT\"), \"System32\", \"rundll32.exe\")\n\t\t)\n\t\targs = []string{runDll32, cmd, filename}\n\tdefault:\n\t\tfmt.Printf(\"无法确定操作系统，请自行打开验证码 %s 文件，并输入验证码。\", filename)\n\t}\n\n\tlogger.Info(\"Command: %s\", strings.Join(args, \" \"))\n\n\terr := exec.Command(args[0], args[1:]...).Run()\n\tif err != nil {\n\t\treturn err\n\t}\n\n\treturn nil\n}\n\nfunc readCaptchaInput() string {\n\tvar captcha string\n\tfmt.Print(color.CyanString(\"请输入验证码：\"))\n\tfmt.Scanf(\"%s\", &captcha)\n\treturn captcha\n}\n\nfunc makeZhihuLink(path string) string {\n\treturn urlJoin(baseZhihuURL, path)\n}\n\nfunc urlJoin(base, path string) string {\n\tif strings.HasSuffix(base, \"/\") {\n\t\tbase = strings.TrimRight(base, \"/\")\n\t}\n\tif strings.HasPrefix(path, \"/\") {\n\t\tpath = strings.TrimLeft(path, \"/\")\n\t}\n\treturn base + \"/\" + path\n}\n\n// newDocumentFromUrl 会请求给定的 url，并返回一个 goquery.Document 对象用于解析\nfunc newDocumentFromURL(url string) (*goquery.Document, error) {\n\tresp, err := gSession.Get(url)\n\tif err != nil {\n\t\tlogger.Error(\"请求 %s 失败：%s\", url, err.Error())\n\t\treturn nil, err\n\t}\n\n\tdoc, err := goquery.NewDocumentFromResponse(resp)\n\tif err != nil {\n\t\tlogger.Error(\"解析页面失败：%s\", err.Error())\n\t}\n\n\treturn doc, err\n}\n\n// ZhihuPage 是一个知乎页面，User, Question, Answer, Collection 的公共部分\ntype Page struct {\n\t// Link 是该页面的链接\n\tLink string\n\n\t// doc 是 HTML document\n\tdoc *goquery.Document\n\n\t// fields 是字段缓存，避免重复解析页面\n\tfields map[string]interface{}\n}\n\n// newZhihuPage 是 private 的构造器\nfunc newZhihuPage(link string) *Page {\n\treturn &Page{\n\t\tLink:   link,\n\t\tfields: make(map[string]interface{}),\n\t}\n}\n\n// Doc 用于获取当前问题页面的 HTML document，惰性求值\nfunc (page *Page) Doc() *goquery.Document {\n\tif page.doc != nil {\n\t\treturn page.doc\n\t}\n\n\terr := page.Refresh()\n\tif err != nil {\n\t\treturn nil\n\t}\n\n\treturn page.doc\n}\n\n// Refresh 会重新载入当前页面，获取最新的数据\nfunc (page *Page) Refresh() (err error) {\n\tpage.fields = make(map[string]interface{})    // 清空缓存\n\tpage.doc, err = newDocumentFromURL(page.Link) // 重载页面\n\treturn err\n}\n\n// GetXsrf 从当前页面内容抓取 xsrf 的值\nfunc (page *Page) GetXSRF() string {\n\tdoc := page.Doc()\n\tvalue, _ := doc.Find(`input[name=\"_xsrf\"]`).Attr(\"value\")\n\treturn value\n}\n\n// totalPages 获取总页数\nfunc (page *Page) totalPages() int {\n\treturn getTotalPages(page.Doc())\n}\n\nfunc (page *Page) setField(field string, value interface{}) {\n\tpage.fields[field] = value\n}\n\nfunc (page *Page) getIntField(field string) (value int, exists bool) {\n\tif got, ok := page.fields[field]; ok {\n\t\treturn got.(int), true\n\t}\n\treturn 0, false\n}\n\nfunc (page *Page) getStringField(field string) (value string, exists bool) {\n\tif got, ok := page.fields[field]; ok {\n\t\treturn got.(string), true\n\t}\n\treturn \"\", false\n}\n\nfunc getTotalPages(doc *goquery.Document) int {\n\tpager := doc.Find(\"div.zm-invite-pager\")\n\tif pager.Size() == 0 {\n\t\treturn 1\n\t}\n\ttext := pager.Find(\"span\").Eq(-2).Text()\n\tpages, _ := strconv.Atoi(text)\n\treturn pages\n}\n\n// nodeListResult 是形如 /node/XXListV2 这样的 Ajax 请求的 JSON 返回值\ntype nodeListResult struct {\n\tR   int      `json:\"r\"`   // 状态码，正确的情况为 0\n\tMsg []string `json:\"msg\"` // 回答内容，每个元素都是一段 HTML 片段\n}\n\n// normalAjaxResult 是页面内，目标 URL 和当前页面 URL 相同的 Ajax 请求返回的 JSON 数据\ntype normalAjaxResult struct {\n\tR   int           `json:\"r\"`\n\tMsg []interface{} `json:\"msg\"` // 两个元素，第一个为话题数量，第二个是 HTML 片段\n}\n"
  },
  {
    "path": "util_test.go",
    "content": "package zhihu\n\nimport (\n\t\"testing\"\n)\n\nfunc Test_validQuestionURL(t *testing.T) {\n\tioMap := map[string]bool{\n\t\t\"https://www.zhihu.com/question/37284137\":  true,\n\t\t\"http://www.zhihu.com/question/41114729\":   true,\n\t\t\"https://www.zhihu.com/question/41114729x\": false,\n\t\t\"https://www.zhihu.com/question/4111472\":   false,\n\t\t\"https://www.zhihu.com/\":                   false,\n\t}\n\n\tfor value, expectedResult := range ioMap {\n\t\tif validQuestionURL(value) != expectedResult {\n\t\t\tt.Error(\"validQuestionURL returns error result\")\n\t\t}\n\t}\n}\n"
  }
]