[
  {
    "path": ".gitignore",
    "content": ".idea/"
  },
  {
    "path": "README.md",
    "content": "# 正方教务系统助手\n该项目中的解析代码是2016年编写的，不保证可用于当前你学校的教务系统。代码简单，仅供参考。\n\n## 1.项目定义\n这个项目实现了正方教务系统的一套API：\n包括模拟登陆，个人信息查询，课表获取，成绩查询等等。\n随着API的不断完善于扩充，可以很方便的作为后台服务。\n比如教务系统手机客户端，桌面客户端，也可以作为某些特定应用需要学生课表，信息的后台。\n\n同时这个项目定义为助手，可以继续开发其他便捷的工具：\n* 自动完成评教任务\n* 期末新的成绩公布，邮件通知\n* 分学期，分学年绩点计算\n* 公选课抢课功能\n\n## 2.项目结构\n1. ZhengFang.db 数据库\n2. model.py 模型层，通过ORM与数据库相连\n3. spider.py 业务层，网页爬虫，**项目入口**\n4. parseHtml.py 业务层，网页解析工具\n\n## 3.如何使用\n```bash\ngit clone git@github.com:nghuyong/ZhengFang.git\ncd ZhengFang/\npip install -r requirements.txt\n# 将spider.py文件中的教务系统地址，账号，密码替换成你自己的\npython spider.py\n```\n\n## 4.项目功能\n项目均已江南大学正方教务系统为例测试\n\n### 4.1模拟登陆\n登陆有两种方式\n\n1. 默认登陆：\n\n需要处理验证码。将验证码下载到本地。code.jpg。人工识别验证码后，手动输入验证码。实现登陆。\n\n2. 绕过验证码登陆：\n\n由于正方教务系统的漏洞在若存在**default5.asp**页面，可以不用验证码直接登陆。可以从default3，default4，都试一试。\n\n### 4.2个人信息获取\n通过教务系统个人信息页面，抓取，个人信息（真的有很多信息！）并持久化保存到数据库中。\n\n### 4.3课表获取\n通过抓取的个人信息读到学生入学的年份，在结合当前时间，就可以知道能够抓取到哪些学期的课表。\n\n例如学生2014年入学，当前是2016年8月，说明至少可以抓取到：\n\n2014-2015年度 第 1 学期\n\n2014-2015年度 第 2 学期\n\n2015-2016年度 第 1 学期\n\n2015-2016年度 第 2 学期\n\n这4个学期的课表，当然由于现在是2016年8月，可能可以抓取到2016-2017年度第 1 学期课表，可以试着抓取。\n\n将抓取到的课表持久化到数据库中。\n\n### 4.4成绩查询\n\n通过教务系统成绩查询页面中的“历年成绩”抓取，课程成绩，并持久化到数据库中\n\n### 4.5分学期分学年绩点计算\n\n根据抓取下来的成绩，通过（总的（学分*绩点）/总的学分）分别计算各个学期的绩点与各个学年的绩点\n\n\n\n"
  },
  {
    "path": "model.py",
    "content": "# author: HuYong\n# coding=utf-8\n\nfrom peewee import *\n\ndb = SqliteDatabase('ZhengFang.db')\n\n\nclass Student(Model):\n    name = CharField(null=True)  # 姓名\n    urlName = CharField(null=True)  # url编码后的姓名\n    studentnumber = CharField(null=True)  # 学号\n    password = CharField(null=True)  # 教务系统密码\n    idCardNumber = CharField(null=True)  # 身份证号\n    sex = CharField(null=True)  # 性别\n    enterSchoolTime = CharField(null=True)  # 入学时间\n    birthsday = CharField(null=True)  # 出生日期\n    highschool = CharField(null=True)  # 毕业中学\n    nationality = CharField(null=True)  # 名族\n    hometown = CharField(null=True)  # 籍贯\n    politicsStatus = CharField(null=True)  # 政治面貌\n    college = CharField(null=True)  # 学院\n    major = CharField(null=True)  # 专业\n    classname = CharField(null=True)  # 所在班级\n    gradeClass = CharField(null=True)  # 年级\n\n    class Meta:\n        database = db\n\n\nclass ClassSchedule(Model):\n    student = ForeignKeyField(Student, related_name=\"classSchedule\")  # 学生\n    year = CharField(null=True)  # 年度\n    term = IntegerField(null=True)  # 学期\n\n    class Meta:\n        database = db\n\n\nclass Class(Model):\n    schedule = ForeignKeyField(ClassSchedule, related_name=\"classes\")  # 归属课表\n    name = CharField(null=True)  # 课程名称\n    type = CharField(null=True)  # 课程性质\n    timeInTheWeek = CharField(null=True)  # 星期几\n    timeInTheDay = CharField(null=True)  # 第几节课\n    timeInTheTerm = CharField(null=True)  # 上课周数\n    teacher = CharField(null=True)  # 授课教师\n    location = CharField(null=True)  # 授课地点\n\n    class Meta:\n        database = db\n\n\nclass YearGrade(Model):\n    student = ForeignKeyField(Student, related_name=\"grades\")  # 归属学生\n    year = CharField(null=True) # 学年\n    yearGPA = DoubleField(null=True)  # 学年GPA\n    yearCredit = DoubleField(null=True)  # 学年总学分\n\n    class Meta:\n        database = db\n\n\nclass TermGrade(Model):\n    year = ForeignKeyField(YearGrade,related_name=\"terms\")  # 归属学年\n    term = IntegerField(null=True) # 学期\n    termGPA = DoubleField(null=True) # 学期GPA\n    termCredit = DoubleField(null=True) #学期总学分\n\n    class Meta:\n        database = db\n\n\n\nclass OneLessonGrade(Model):\n    term = ForeignKeyField(TermGrade, related_name=\"lessonsGrades\")  # 归属学期\n    name = CharField(null=True)  # 课程名\n    type = CharField(null=True)  # 课程性质\n    credit = DoubleField(null=True)  # 学分\n    gradePoint = DoubleField(null=True)  # 绩点\n    grade = CharField(null=True)  # 成绩\n\n    class Meta:\n        database = db"
  },
  {
    "path": "parseHtml.py",
    "content": "# author: HuYong\n# coding=utf-8\nfrom bs4 import BeautifulSoup\n\n\n# 从网页中解析学生信息\ndef getStudentInfor(response):\n    html = response.content.decode(\"gb2312\")\n    soup = BeautifulSoup(html.decode(\"utf-8\"), \"html5lib\")\n    d = {}\n    d[\"studentnumber\"] = soup.find(id=\"xh\").string\n    d[\"idCardNumber\"] = soup.find(id=\"lbl_sfzh\").string\n    d[\"name\"] = soup.find(id=\"xm\").string\n    d[\"sex\"] = soup.find(id=\"lbl_xb\").string\n    d[\"enterSchoolTime\"] = soup.find(id=\"lbl_rxrq\").string\n    d[\"birthsday\"] = soup.find(id=\"lbl_csrq\").string\n    d[\"highschool\"] = soup.find(id=\"lbl_byzx\").string\n    d[\"nationality\"] = soup.find(id=\"lbl_mz\").string\n    d[\"hometown\"] = soup.find(id=\"lbl_jg\").string\n    d[\"politicsStatus\"] = soup.find(id=\"lbl_zzmm\").string\n    d[\"college\"] = soup.find(id=\"lbl_xy\").string\n    d[\"major\"] = soup.find(id=\"lbl_zymc\").string\n    d[\"classname\"] = soup.find(id=\"lbl_xzb\").string\n    d[\"gradeClass\"] = soup.find(id=\"lbl_dqszj\").string\n    return d\n\n\n# 从网页中解析课表信息\ndef getClassScheduleFromHtml(response):\n    html = response.content.decode(\"gb2312\",\"ignore\")\n    soup = BeautifulSoup(html.decode(\"utf-8\"), \"html5lib\")\n    __VIEWSTATE = soup.findAll(name=\"input\")[2][\"value\"]\n    trs = soup.find(id=\"Table1\").find_all('tr')\n    classes = []\n    for tr in trs:\n        tds = tr.find_all('td')\n        for td in tds:\n            if td.string == None:\n                oneClassKeys = [\"name\", \"type\", \"time\", \"teacher\", \"location\"]\n                oneClassValues = []\n                for child in td.children:\n                    if child.string != None:\n                        oneClassValues.append(child.string)\n                while len(oneClassValues) < len(oneClassKeys):\n                    oneClassValues.append(\"\")\n                oneClass = dict((key, value) for key, value in zip(oneClassKeys, oneClassValues))\n                oneClass[\"timeInTheWeek\"] = oneClass[\"time\"].split(\"{\")[0][:2]\n                oneClass[\"timeInTheDay\"] = oneClass[\"time\"].split(\"{\")[0][2:]\n                oneClass[\"timeInTheTerm\"] = oneClass[\"time\"].split(\"{\")[1][:-1]\n                classes.append(oneClass)\n    return {\"classes\": classes, \"__VIEWSTATE\": __VIEWSTATE}\n\n\ndef get__VIEWSTATE(response):\n    html = response.content.decode(\"gb2312\")\n    soup = BeautifulSoup(html.decode(\"utf-8\"), \"html5lib\")\n    __VIEWSTATE = soup.findAll(name=\"input\")[2][\"value\"]\n    return __VIEWSTATE\n\n\ndef getGrade(response):\n    html = response.content.decode(\"gb2312\")\n    soup = BeautifulSoup(html.decode(\"utf-8\"), \"html5lib\")\n    trs = soup.find(id=\"Datagrid1\").findAll(\"tr\")[1:]\n    Grades = []\n    for tr in trs:\n        tds = tr.findAll(\"td\")\n        tds = tds[:2] + tds[3:5] + tds[6:9]\n        oneGradeKeys = [\"year\", \"term\", \"name\", \"type\", \"credit\",\"gradePonit\",\"grade\"]\n        oneGradeValues = []\n        for td in tds:\n            oneGradeValues.append(td.string)\n        oneGrade = dict((key, value) for key, value in zip(oneGradeKeys, oneGradeValues))\n        Grades.append(oneGrade)\n    return Grades\n\n"
  },
  {
    "path": "requirements.txt",
    "content": "peewee==3.2.2\nrequests==2.19.1\nbeautifulsoup4==4.6.3\nlxml==4.2.5\n"
  },
  {
    "path": "spider.py",
    "content": "# author: HuYong\n# coding=utf-8\nimport os\nimport sys\nimport urllib\nimport datetime\nimport requests\nfrom lxml import etree\nfrom parseHtml import getClassScheduleFromHtml, getStudentInfor, get__VIEWSTATE, getGrade\nfrom model import Student, db, ClassSchedule, Class, YearGrade, OneLessonGrade, TermGrade\n\n\nclass ZhengFangSpider:\n    def __init__(self, student, baseUrl=\"http://202.195.144.168/jndx\"):\n        self.student = student\n        self.baseUrl = baseUrl\n        self.session = requests.session()\n        self.session.headers[\n            'User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'\n\n    # 含验证码登陆\n    def login(self):\n        loginurl = self.baseUrl + \"/default2.aspx\"\n        response = self.session.get(loginurl)\n        selector = etree.HTML(response.content)\n        __VIEWSTATE = selector.xpath('//*[@id=\"form1\"]/input/@value')[0]\n        imgUrl = self.baseUrl + \"/CheckCode.aspx?\"\n        imgresponse = self.session.get(imgUrl, stream=True)\n        image = imgresponse.content\n        DstDir = os.getcwd() + \"\\\\\"\n        print(\"保存验证码到：\" + DstDir + \"code.jpg\" + \"\\n\")\n        try:\n            with open(DstDir + \"code.jpg\", \"wb\") as jpg:\n                jpg.write(image)\n        except IOError:\n            print(\"IO Error\\n\")\n        finally:\n            jpg.close()\n        code = input(\"验证码是：\")\n        RadioButtonList1 = u\"学生\".encode('gb2312', 'replace')\n        data = {\n            \"RadioButtonList1\": RadioButtonList1,\n            \"__VIEWSTATE\": __VIEWSTATE,\n            \"TextBox1\": self.student.studentnumber,\n            \"TextBox2\": self.student.password,\n            \"TextBox3\": code,\n            \"Button1\": \"\",\n            \"lbLanguage\": \"\"\n        }\n        # 登陆教务系统\n        Loginresponse = self.session.post(loginurl, data=data)\n        if Loginresponse.status_code == requests.codes.ok:\n            print(\"成功进入教务系统！\")\n\n    # 绕过验证码登陆\n    def loginWithOutCode(self):\n        loginurl = self.baseUrl + \"/default5.aspx\"\n        response = self.session.get(loginurl)\n        selector = etree.HTML(response.content)\n        __VIEWSTATE = selector.xpath('//*[@id=\"form1\"]/input/@value')[0]\n        RadioButtonList1 = u\"学生\".encode('gb2312', 'replace')\n        data = {\n            \"RadioButtonList1\": RadioButtonList1,\n            \"__VIEWSTATE\": __VIEWSTATE,\n            \"TextBox1\": self.student.studentnumber,\n            \"TextBox2\": self.student.password,\n            \"Button1\": \"\",\n        }\n        # 登陆教务系统\n        Loginresponse = self.session.post(loginurl, data=data)\n        if Loginresponse.status_code == requests.codes.ok:\n            print(\"成功进入教务系统！\")\n\n    # 获取学生基本信息\n    def getStudentBaseInfo(self):\n        self.session.headers['Referer'] = self.baseUrl + \"/xs_main.aspx?xh=\" + self.student.studentnumber\n        url = self.baseUrl + \"/xsgrxx.aspx?xh=\" + self.student.studentnumber + \"&\"\n        response = self.session.get(url)\n        d = getStudentInfor(response)\n        self.student.idCardNumber = d[\"idCardNumber\"]\n        self.student.name = d[\"name\"]\n        self.student.urlName = urllib.quote_plus(str(d[\"name\"].encode('gb2312')))\n        self.student.sex = d[\"sex\"]\n        self.student.enterSchoolTime = d[\"enterSchoolTime\"]\n        self.student.birthsday = d[\"birthsday\"]\n        self.student.highschool = d[\"highschool\"]\n        self.student.nationality = d[\"nationality\"]\n        self.student.hometown = d[\"hometown\"]\n        self.student.politicsStatus = d[\"politicsStatus\"]\n        self.student.college = d[\"college\"]\n        self.student.major = d[\"major\"]\n        self.student.classname = d[\"classname\"]\n        self.student.gradeClass = d[\"gradeClass\"]\n        self.student.save()\n        print(\"读取学生基本信息成功\")\n\n    # 获取学生课表\n    def getClassSchedule(self):\n        self.session.headers['Referer'] = self.baseUrl + \"/xs_main.aspx?xh=\" + self.student.studentnumber\n        url = self.baseUrl + \"/xskbcx.aspx?xh=\" + self.student.studentnumber + \"&xm=\" + self.student.urlName + \"&gnmkdm=N121603\"\n        response = self.session.get(url, allow_redirects=False)\n        __VIEWSTATE = getClassScheduleFromHtml(response)[\"__VIEWSTATE\"]\n        year = int(self.student.gradeClass)\n        term = 1\n        today = datetime.date.today()\n        while today.year > year or (today.year == year and today.month >= 7 and term == 1):\n            data = {\n                \"__EVENTTARGET\": \"xqd\",\n                \"__EVENTARGUMENT\": \"\",\n                \"__VIEWSTATE\": __VIEWSTATE,\n                \"xnd\": str(year) + \"-\" + str(year + 1),\n                \"xqd\": str(term),\n            }\n            self.session.headers['Referer'] = url\n            response = self.session.post(url, data)\n            print(\"正在获取\" + str(year) + \"-\" + str(year + 1) + \"学年第\" + str(term) + \"学期课表\")\n            classes = getClassScheduleFromHtml(response)[\"classes\"]\n            __VIEWSTATE = getClassScheduleFromHtml(response)[\"__VIEWSTATE\"]\n            classSchedule = ClassSchedule(student=self.student, year=str(year) + \"-\" + str(year + 1), term=term)\n            classSchedule.save()\n            for each in classes:\n                oneClass = Class(schedule=classSchedule, name=each[\"name\"], type=each[\"type\"],\n                                 timeInTheWeek=each[\"timeInTheWeek\"], timeInTheDay=each[\"timeInTheDay\"],\n                                 timeInTheTerm=each[\"timeInTheTerm\"],\n                                 teacher=each[\"teacher\"], location=each[\"location\"]\n                                 )\n                oneClass.save()\n            term = term + 1\n            if term > 2:\n                term = 1\n                year = year + 1\n        print(\"成功获取课表\")\n\n    # 获取学生绩点\n    def getStudentGrade(self):\n        url = self.baseUrl + \"/xscjcx.aspx?xh=\" + self.student.studentnumber + \"&xm=\" + self.student.urlName + \"&gnmkdm=N121605\"\n        self.session.headers['Referer'] = self.baseUrl + \"/xs_main.aspx?xh=\" + self.student.studentnumber\n        response = self.session.get(url)\n        __VIEWSTATE = get__VIEWSTATE(response)\n        self.session.headers['Referer'] = url\n        data = {\n            \"__EVENTTARGET\": \"\",\n            \"__EVENTARGUMENT\": \"\",\n            \"__VIEWSTATE\": __VIEWSTATE,\n            'hidLanguage': \"\",\n            \"ddlXN\": \"\",\n            \"ddlXQ\": \"\",\n            \"ddl_kcxz\": \"\",\n            \"btn_zcj\": u\"历年成绩\".encode('gb2312', 'replace')\n        }\n        response = self.session.post(url, data=data)\n        grades = getGrade(response)\n        for onegrade in grades:\n            year = onegrade[\"year\"]\n            term = onegrade[\"term\"]\n            try:\n                yearGrade = YearGrade.get(YearGrade.year == year, YearGrade.student == self.student)\n            except:\n                yearGrade = YearGrade(year=year, student=self.student)\n                yearGrade.save()\n            try:\n                termGrade = TermGrade.get(TermGrade.year == yearGrade, TermGrade.term == int(term))\n            except:\n                termGrade = TermGrade(year=yearGrade, term=int(term))\n                termGrade.save()\n            try:\n                gradePoint = float(onegrade[\"gradePonit\"])\n            except:\n                gradePoint = None\n            oneLessonGrade = OneLessonGrade(term=termGrade, name=onegrade[\"name\"], type=onegrade[\"type\"],\n                                            credit=float(onegrade[\"credit\"]), gradePoint=gradePoint,\n                                            grade=onegrade[\"grade\"])\n            oneLessonGrade.save()\n        print(\"获取成绩成功\")\n\n    # 计算每学期，每学年的绩点\n    def calculateOneTermAndOneYearGPA(self):\n        years = self.student.grades\n        for year in years:\n            terms = year.terms\n            for term in terms:\n                sumCredit = 0.0\n                sumGrade = 0.0\n                grades = term.lessonsGrades\n                for grade in grades:\n                    if grade.gradePoint is None:\n                        continue\n                    sumGrade = sumGrade + (grade.credit * grade.gradePoint)\n                    sumCredit = sumCredit + grade.credit\n                termGPA = float('%.2f' % (sumGrade / sumCredit))\n                term.termGPA = termGPA\n                term.termCredit = sumCredit\n                term.save()\n            sumGrade = 0.0\n            sumCredit = 0.0\n            for term in terms:\n                sumGrade += term.termGPA * term.termCredit\n                sumCredit += term.termCredit\n            year.yearGPA = float('%.2f' % (sumGrade / sumCredit))\n            year.yearCredit = sumCredit\n            year.save()\n        print(\"绩点计算完毕\")\n\n\nif __name__ == \"__main__\":\n\n    # 连接数据库，建立数据表\n    try:\n        db.connect()\n        db.create_tables([Student, ClassSchedule, Class, YearGrade, TermGrade, OneLessonGrade])\n    except:\n        pass\n\n    # 查找学生，若不存在则创建账号\n    try:\n        student = Student.get(Student.studentnumber == \"xxxxxxxx\")\n    except Exception as e:\n        student = Student(studentnumber=\"xxxxxxxx\", password=\"xxxxxxxxx\")  # 用自己的教务系统账号密码\n        student.save()\n\n    spider = ZhengFangSpider(student, baseUrl=\"http://202.195.144.168/jndx\")  # 实例化爬虫\n    spider.loginWithOutCode()\n    if student.name is None:\n        spider.getStudentBaseInfo()\n    spider.getStudentGrade()\n    spider.calculateOneTermAndOneYearGPA()\n    spider.getClassSchedule()\n"
  }
]