Repository: YinAoXiong/12306_code_server Branch: master Commit: 10d7655e7b34 Files: 20 Total size: 17.1 KB Directory structure: gitextract_fqj4pppp/ ├── .dockerignore ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── app.py ├── arm32v7-requirements.txt ├── arm64v8-requirements.txt ├── docker/ │ ├── amd64-Dockerfile │ ├── arm32v7-Dockerfile │ └── arm64v8-Dockerfile ├── download_model.sh ├── gunicorn.conf.py ├── qemu/ │ ├── qemu-aarch64-static │ └── qemu-arm-static ├── requirements.txt └── verify/ ├── __init__.py ├── localVerifyCode.py ├── mlearn_for_image.py └── pretreatment.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ .git .vscode .idea README.md LICENSE ================================================ FILE: .gitignore ================================================ .vscode venv __pycache__/ ================================================ FILE: .travis.yml ================================================ dist: bionic sudo: required services: - docker language: bash branches: only: - master env: - IMAGE_NAME=12306_code_server DOCKER_CLI_EXPERIMENTAL=enabled addons: apt: packages: - docker-ce install: - docker --version - docker run --rm --privileged multiarch/qemu-user-static:register --reset - chmod +x qemu/qemu-aarch64-static && chmod +x qemu/qemu-arm-static script: - | # 构建docker镜像 IMAGE_ARCH="amd64 arm32v7 arm64v8" COMMIT_SHA=$(git log -1 --pretty=format:"%H") LATEST_TAG_SHA=$(git rev-list --tags --max-count=1) for THIS_ARCH in ${IMAGE_ARCH}; do docker build -t $IMAGE_NAME:$THIS_ARCH \ -f docker/$THIS_ARCH-Dockerfile . if [ $COMMIT_SHA == $LATEST_TAG_SHA ]; then docker tag $IMAGE_NAME:$THIS_ARCH $IMAGE_NAME:$THIS_ARCH-$(git describe --abbrev=0 --tags) fi done # 推送镜像到docker hub if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then for THIS_ARCH in ${IMAGE_ARCH}; do # 标记镜像 docker tag $IMAGE_NAME:$THIS_ARCH $DOCKER_USERNAME/$IMAGE_NAME:$THIS_ARCH if [ $COMMIT_SHA == $LATEST_TAG_SHA ]; then docker tag $DOCKER_USERNAME/$IMAGE_NAME:$THIS_ARCH $DOCKER_USERNAME/$IMAGE_NAME:$THIS_ARCH-$(git describe --abbrev=0 --tags) fi done echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin docker push $DOCKER_USERNAME/$IMAGE_NAME docker manifest create $DOCKER_USERNAME/$IMAGE_NAME:latest $DOCKER_USERNAME/$IMAGE_NAME:amd64 $DOCKER_USERNAME/$IMAGE_NAME:arm32v7 $DOCKER_USERNAME/$IMAGE_NAME:arm64v8 docker manifest inspect $DOCKER_USERNAME/$IMAGE_NAME:latest docker manifest push $DOCKER_USERNAME/$IMAGE_NAME:latest if [ $COMMIT_SHA == $LATEST_TAG_SHA ]; then TAG=$(git describe --abbrev=0 --tags) docker manifest create $DOCKER_USERNAME/$IMAGE_NAME:$TAG $DOCKER_USERNAME/$IMAGE_NAME:amd64 $DOCKER_USERNAME/$IMAGE_NAME:arm32v7 $DOCKER_USERNAME/$IMAGE_NAME:arm64v8 docker manifest inspect $DOCKER_USERNAME/$IMAGE_NAME:$TAG docker manifest push $DOCKER_USERNAME/$IMAGE_NAME:$TAG fi fi ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2019 尹傲雄 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # 12306验证码识别服务器 [![Uptime Robot status](https://img.shields.io/uptimerobot/status/m783635180-ab3d4772f147c2a3b92f8fe5)](https://stats.uptimerobot.com/oyKyLhjJQ/783635180) [![Uptime Robot ratio (30 days)](https://img.shields.io/uptimerobot/ratio/m783635180-ab3d4772f147c2a3b92f8fe5)](https://stats.uptimerobot.com/oyKyLhjJQ/783635180) [![Build Status](https://travis-ci.org/YinAoXiong/12306_code_server.svg?branch=master)](https://travis-ci.org/YinAoXiong/12306_code_server) [![Docker Pulls](https://img.shields.io/docker/pulls/yinaoxiong/12306_code_server)](https://hub.docker.com/r/yinaoxiong/12306_code_server) 该项目用于构建自托管的12306验证码识别服务器,本项目的全部模型和部分代码来自于此项目 [easy12306](https://github.com/zhaipro/easy12306),使用该项目构建的api符合 [12306购票小助手](https://github.com/testerSunshine/12306)云打码格式可以直接调用。 提供一个部署好的线上版本, [https://12306.yinaoxiong.cn](https://12306.yinaoxiong.cn/),部署在腾讯云1核1G的学生机上不保证可用性,服务状态可以通过 [https://stats.uptimerobot.com/oyKyLhjJQ/783635180](https://stats.uptimerobot.com/oyKyLhjJQ/783635180)查看. ## 接口规范 ### 请求 - Method: **POST** - URL: ```/verify/base64/``` - Headers: Content-Type: application/x-www-form-urlencoded - Body: imageFile=>Base64 encoding of the image ### 响应 - Headers:Content-Type:application/json - Body: ```json { "code": 0, "data": [ "1", //答案图片的编号数组 "3" ], "massage": "识别成功" } { "code": 1, "data": [ ], "massage": "识别失败" } ``` ## python版本支持 - [x] 3.5-3.7 ## 平台支持 - [x] amd64 - [x] arm64v8 - [x] arm32v7 其中arm平台建议通过docker运行 ## 部署 ### docker部署(推荐) 使用docker可以使用如下命令快速部署: ```shell docker run -d -p 8080:80 --name 12306 yinaoxiong/12306_code_server ``` ### docker-compose部署(推荐) ```yaml version: "3" services: code_12306: image: yinaoxiong/12306_code_server ports: - 5002:80 #可以根据需要修改端口 environment: - WORKERS=1 #gunicorn works 默认为1可以根据服务器配置自行调整 restart: always ``` ### 通过源码部署 1. 克隆并进入项目 ```shell git clone https://github.com/YinAoXiong/12306_code_server.git cd 12306_code_server ``` 2. 安装依赖 自行根据平台和python选择对应的tflite(下面的例子为amd64,python3.7,其他情况对应的下载地址见 [https://www.tensorflow.org/lite/guide/python](https://www.tensorflow.org/lite/guide/python),可自行在requirements.txt中替换) ```shell pip3 install -r requirements.txt ``` 3. 下载模型文件 ```shell bash download_model.sh ``` 从GitHub下载慢的话可以选择执行下面的命令 ```shell wget -c https://cdn.yinaoxiong.cn/models/image.model.tflite wget -c https://cdn.yinaoxiong.cn/models/text.model.tflite ``` 4. 运行 默认workers为1,使用80端口,可以自行修改 gunicorn.conf ```shell gunicorn app:app -c gunicorn.conf.py ``` 不推荐在arm平台上使用源码部署,依赖安装有些麻烦. ## 致谢 - [easy12306](https://github.com/zhaipro/easy12306) 提供项目运行的model - [12306购票小助手](https://github.com/testerSunshine/12306)源于该项目的一个issue - ~~[tensorflow-on-arm](https://github.com/lhelontra/tensorflow-on-arm)提供arm上运行的tensorflow python包~~ v1.1版本后开始使用tflite而非keras ================================================ FILE: app.py ================================================ # -*- coding: utf-8 -*- import io import base64 import flask import numpy as np from PIL import Image, ImageFile from verify import pretreatment import tflite_runtime.interpreter as tflite app = flask.Flask(__name__) # 模型的全局变量 textModel = None imgModel = None # 设置加载截断的图片,解决issue #10 ImageFile.LOAD_TRUNCATED_IMAGES = True @app.before_first_request def load_model(): ''' 加载模型函数 :return: ''' global textModel global imgModel textModel = tflite.Interpreter( 'text.model.tflite') textModel.allocate_tensors() imgModel = tflite.Interpreter( 'image.model.tflite') imgModel.allocate_tensors() def predict(model, input): input_details = model.get_input_details() output_details = model.get_output_details() model.set_tensor(input_details[0]['index'], np.float32(input)) model.invoke() result = model.get_tensor(output_details[0]['index']) return result def base64_to_image(base64_code): ''' :param base64_code: base64编码的图片 :return: bgr格式的图片 ''' # base64解码 img_data = base64.b64decode(base64_code) # 读取图片 img = np.asarray(Image.open(io.BytesIO(img_data))) # 转换为bgr格式 img = img[..., ::-1] return img def get_text(img, offset=0): ''' 得到图片中文字的部分 :param img: 原始图像 :param offset: :return: 文字部分的灰度图像 ''' text = pretreatment.get_text(img, offset) text = text[..., 0] * 0.114 + text[..., 1] * 0.587 + text[ ..., 2] * 0.299 text = text / 255.0 h, w = text.shape text.shape = (1, h, w, 1) return text def preprocess_input(x): x = x.astype('float32') # 我是用cv2来读取的图片,其已经是BGR格式了 mean = [103.939, 116.779, 123.68] x -= mean return x @app.route('/verify/base64/', methods=['POST']) def predict_verify(): verify_titles = ['打字机', '调色板', '跑步机', '毛线', '老虎', '安全帽', '沙包', '盘子', '本子', '药片', '双面胶', '龙舟', '红酒', '拖把', '卷尺', '海苔', '红豆', '黑板', '热水袋', '烛台', '钟表', '路灯', '沙拉', '海报', '公交卡', '樱桃', '创可贴', '牌坊', '苍蝇拍', '高压锅', '电线', '网球拍', '海鸥', '风铃', '订书机', '冰箱', '话梅', '排风机', '锅铲', '绿豆', '航母', '电子秤', '红枣', '金字塔', '鞭炮', '菠萝', '开瓶器', '电饭煲', '仪表盘', '棉棒', '篮球', '狮子', '蚂蚁', '蜡烛', '茶盅', '印章', '茶几', '啤酒', '档案袋', '挂钟', '刺绣', '铃铛', '护腕', '手掌印', '锦旗', '文具盒', '辣椒酱', '耳塞', '中国结', '蜥蜴', '剪纸', '漏斗', '锣', '蒸笼', '珊瑚', '雨靴', '薯条', '蜜蜂', '日历', '口哨'] if flask.request.method == 'POST': # 读取并预处理验证码 img = flask.request.form['imageFile'] img = base64_to_image(img) text = get_text(img) imgs = np.array(list(pretreatment._get_imgs(img))) imgs = preprocess_input(imgs) text_list = [] label = predict(textModel, text) label = label.argmax() text = verify_titles[label] text_list.append(text) # 获取下一个词 # 根据第一个词的长度来定位第二个词的位置 if len(text) == 1: offset = 27 elif len(text) == 2: offset = 47 else: offset = 60 text = get_text(img, offset=offset) if text.mean() < 0.95: label = predict(textModel, text) label = label.argmax() text = verify_titles[label] text_list.append(text) print(f"题目为{text_list}") labels = predict(imgModel, imgs) labels = labels.argmax(axis=1) results = [] for pos, label in enumerate(labels): l = verify_titles[label] print(pos + 1, l) if l in text_list: results.append(str(pos + 1)) if(len(results) != 0): return {'code': 0, 'massage': '识别成功', 'data': results} else: return {'code': 1, 'massage': '识别失败', 'data': results} @app.route('/') def hello_world(): return 'Hello World!' if __name__ == '__main__': app.run() ================================================ FILE: arm32v7-requirements.txt ================================================ flask==1.1.1 Pillow>=6.2.2 gunicorn==19.9.0 gevent==1.4.0 numpy==1.17.4 https://dl.google.com/coral/python/tflite_runtime-1.14.0-cp37-cp37m-linux_armv7l.whl ================================================ FILE: arm64v8-requirements.txt ================================================ flask==1.1.1 gunicorn==19.9.0 https://cdn.yinaoxiong.cn/wheels/numpy-1.17.4-cp37-cp37m-linux_aarch64.whl https://cdn.yinaoxiong.cn/wheels/Pillow-7.1.2-cp37-cp37m-linux_aarch64.whl https://cdn.yinaoxiong.cn/wheels/greenlet-0.4.15-cp37-cp37m-linux_aarch64.whl https://cdn.yinaoxiong.cn/wheels/gevent-1.4.0-cp37-cp37m-linux_aarch64.whl https://dl.google.com/coral/python/tflite_runtime-1.14.0-cp37-cp37m-linux_aarch64.whl ================================================ FILE: docker/amd64-Dockerfile ================================================ FROM python:3.7-slim-buster LABEL maintainer="Yin Aoxiong " \ reference="https://github.com/yinaoxiong/12306_code_server" WORKDIR /app RUN set -ex && \ apt-get update && \ apt-get install -y wget && \ apt-get clean && rm -rf /var/lib/apt/lists/* COPY requirements.txt ./ RUN pip install --no-cache-dir -r requirements.txt COPY download_model.sh ./ RUN bash download_model.sh COPY . . # 服务运行在80端口 EXPOSE 80 CMD ["gunicorn", "app:app", "-c", "gunicorn.conf.py"] ================================================ FILE: docker/arm32v7-Dockerfile ================================================ FROM arm32v7/python:3.7-slim-buster LABEL maintainer="Yin Aoxiong " \ reference="https://github.com/yinaoxiong/12306_code_server" WORKDIR /app COPY qemu/qemu-arm-static /usr/bin/qemu-arm-static RUN set -ex && \ apt-get update && \ apt-get install -y libwebpdemux2 libzstd1 libopenjp2-7 libjbig0 libtiff5 liblcms2-2 libwebp6 libwebpmux3 \ libatlas3-base libgfortran5 wget && \ apt-get clean && rm -rf /var/lib/apt/lists/* COPY arm32v7-requirements.txt ./ RUN pip install --no-cache-dir --extra-index-url=https://www.piwheels.org/simple -r arm32v7-requirements.txt COPY download_model.sh ./ RUN bash download_model.sh COPY . . # 服务运行在80端口 EXPOSE 80 CMD ["gunicorn", "app:app", "-c", "gunicorn.conf.py"] ================================================ FILE: docker/arm64v8-Dockerfile ================================================ FROM arm64v8/python:3.7-slim-buster LABEL maintainer="Yin Aoxiong " \ reference="https://github.com/yinaoxiong/12306_code_server" WORKDIR /app COPY qemu/qemu-aarch64-static /usr/bin/qemu-aarch64-static RUN set -ex && \ apt-get update && \ apt-get install -y libwebpdemux2 libzstd1 libopenjp2-7 libjbig0 libtiff5 liblcms2-2 libwebp6 libwebpmux3 \ libopenblas-base libgfortran5 wget && \ apt-get clean && rm -rf /var/lib/apt/lists/* COPY arm64v8-requirements.txt ./ RUN pip install --no-cache-dir -r arm64v8-requirements.txt COPY download_model.sh ./ RUN bash download_model.sh COPY . . # 服务运行在80端口 EXPOSE 80 CMD ["gunicorn", "app:app", "-c", "gunicorn.conf.py"] ================================================ FILE: download_model.sh ================================================ wget -c https://github.com/YinAoXiong/12306_code_server/releases/download/v1.1/text.model.tflite wget -c https://github.com/YinAoXiong/12306_code_server/releases/download/v1.1/image.model.tflite ================================================ FILE: gunicorn.conf.py ================================================ import multiprocessing import os bind = "0.0.0.0:80" workers = os.getenv("WORKERS",1) worker_class = "gevent" ================================================ FILE: requirements.txt ================================================ flask==1.1.1 Pillow>=6.2.2 gunicorn==19.9.0 gevent==1.4.0 numpy==1.17.4 https://dl.google.com/coral/python/tflite_runtime-1.14.0-cp37-cp37m-linux_x86_64.whl ================================================ FILE: verify/__init__.py ================================================ ================================================ FILE: verify/localVerifyCode.py ================================================ # coding: utf-8 import TickerConfig if TickerConfig.AUTO_CODE_TYPE == 2: import base64 import os import numpy as np from keras import models, backend import tensorflow as tf from verify import pretreatment from verify.mlearn_for_image import preprocess_input from io import BytesIO from PIL import Image graph = tf.get_default_graph() PATH = lambda p: os.path.abspath( os.path.join(os.path.dirname(__file__), p) ) TEXT_MODEL = "" IMG_MODEL = "" def get_text(img, offset=0): text = pretreatment.get_text(img, offset) text = text[..., 0] * 0.114 + text[..., 1] * 0.587 + text[ ..., 2] * 0.299 # text = cv2.cvtColor(text, cv2.COLOR_BGR2GRAY) text = text / 255.0 h, w = text.shape text.shape = (1, h, w, 1) return text def base64_to_image(base64_code): # base64解码 img_data = base64.b64decode(base64_code) # 读取图片 img = np.asarray(Image.open(BytesIO(img_data))) # 转换为bgr格式 img = img[..., ::-1] return img class Verify: def __init__(self): self.textModel = "" self.imgModel = "" self.loadImgModel() self.loadTextModel() def loadTextModel(self): if not self.textModel: self.textModel = models.load_model(PATH('../model.v2.0.h5')) else: print("无需加载模型model.v2.0.h5") def loadImgModel(self): if not self.imgModel: self.imgModel = models.load_model(PATH('../12306.image.model.h5')) def verify(self, fn): verify_titles = ['打字机', '调色板', '跑步机', '毛线', '老虎', '安全帽', '沙包', '盘子', '本子', '药片', '双面胶', '龙舟', '红酒', '拖把', '卷尺', '海苔', '红豆', '黑板', '热水袋', '烛台', '钟表', '路灯', '沙拉', '海报', '公交卡', '樱桃', '创可贴', '牌坊', '苍蝇拍', '高压锅', '电线', '网球拍', '海鸥', '风铃', '订书机', '冰箱', '话梅', '排风机', '锅铲', '绿豆', '航母', '电子秤', '红枣', '金字塔', '鞭炮', '菠萝', '开瓶器', '电饭煲', '仪表盘', '棉棒', '篮球', '狮子', '蚂蚁', '蜡烛', '茶盅', '印章', '茶几', '啤酒', '档案袋', '挂钟', '刺绣', '铃铛', '护腕', '手掌印', '锦旗', '文具盒', '辣椒酱', '耳塞', '中国结', '蜥蜴', '剪纸', '漏斗', '锣', '蒸笼', '珊瑚', '雨靴', '薯条', '蜜蜂', '日历', '口哨'] # 读取并预处理验证码 img = base64_to_image(fn) text = get_text(img) imgs = np.array(list(pretreatment._get_imgs(img))) imgs = preprocess_input(imgs) text_list = [] # 识别文字 self.loadTextModel() global graph with graph.as_default(): label = self.textModel.predict(text) label = label.argmax() text = verify_titles[label] text_list.append(text) # 获取下一个词 # 根据第一个词的长度来定位第二个词的位置 if len(text) == 1: offset = 27 elif len(text) == 2: offset = 47 else: offset = 60 text = get_text(img, offset=offset) if text.mean() < 0.95: with graph.as_default(): label = self.textModel.predict(text) label = label.argmax() text = verify_titles[label] text_list.append(text) print("题目为{}".format(text_list)) # 加载图片分类器 self.loadImgModel() with graph.as_default(): labels = self.imgModel.predict(imgs) labels = labels.argmax(axis=1) results = [] for pos, label in enumerate(labels): l = verify_titles[label] print(pos + 1, l) if l in text_list: results.append(str(pos + 1)) return results if __name__ == '__main__': pass # verify("verify-img1.jpeg") ================================================ FILE: verify/mlearn_for_image.py ================================================ # coding: utf-8 def preprocess_input(x): x = x.astype('float32') # 我是用cv2来读取的图片,其已经是BGR格式了 mean = [103.939, 116.779, 123.68] x -= mean return x ================================================ FILE: verify/pretreatment.py ================================================ #! env python # coding: utf-8 # 功能:对图像进行预处理,将文字部分单独提取出来 # 并存放到ocr目录下 # 文件名为原验证码文件的文件名 def get_text(img, offset=0): # 得到图像中的文本部分 return img[3:22, 120 + offset:177 + offset] def _get_imgs(img): interval = 5 length = 67 for x in range(40, img.shape[0] - length, interval + length): for y in range(interval, img.shape[1] - length, interval + length): yield img[x:x + length, y:y + length]