Repository: YinAoXiong/12306_code_server
Branch: master
Commit: 10d7655e7b34
Files: 20
Total size: 17.1 KB

Directory structure:
gitextract_fqj4pppp/

├── .dockerignore
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── app.py
├── arm32v7-requirements.txt
├── arm64v8-requirements.txt
├── docker/
│   ├── amd64-Dockerfile
│   ├── arm32v7-Dockerfile
│   └── arm64v8-Dockerfile
├── download_model.sh
├── gunicorn.conf.py
├── qemu/
│   ├── qemu-aarch64-static
│   └── qemu-arm-static
├── requirements.txt
└── verify/
    ├── __init__.py
    ├── localVerifyCode.py
    ├── mlearn_for_image.py
    └── pretreatment.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .dockerignore
================================================
.git
.vscode
.idea
README.md
LICENSE

================================================
FILE: .gitignore
================================================
.vscode
venv
__pycache__/

================================================
FILE: .travis.yml
================================================
dist: bionic
sudo: required
services:
  - docker
language: bash

branches:
  only:
  - master

env:
  - IMAGE_NAME=12306_code_server DOCKER_CLI_EXPERIMENTAL=enabled

addons:
  apt:
    packages:
      - docker-ce

install:
  - docker --version
  - docker run --rm --privileged multiarch/qemu-user-static:register --reset
  - chmod +x qemu/qemu-aarch64-static && chmod +x qemu/qemu-arm-static

script:
  - |
    # 构建docker镜像
    IMAGE_ARCH="amd64 arm32v7 arm64v8"
    COMMIT_SHA=$(git log -1 --pretty=format:"%H")
    LATEST_TAG_SHA=$(git rev-list --tags --max-count=1)

    for THIS_ARCH in ${IMAGE_ARCH}; do
      docker build -t $IMAGE_NAME:$THIS_ARCH \
      -f docker/$THIS_ARCH-Dockerfile .
      if [ $COMMIT_SHA == $LATEST_TAG_SHA ]; then
        docker tag $IMAGE_NAME:$THIS_ARCH $IMAGE_NAME:$THIS_ARCH-$(git describe --abbrev=0 --tags)
      fi
    done
    
    # 推送镜像到docker hub
    if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then
      for THIS_ARCH in ${IMAGE_ARCH}; do
        #  标记镜像
        docker tag $IMAGE_NAME:$THIS_ARCH $DOCKER_USERNAME/$IMAGE_NAME:$THIS_ARCH
        if [ $COMMIT_SHA == $LATEST_TAG_SHA ]; then
          docker tag $DOCKER_USERNAME/$IMAGE_NAME:$THIS_ARCH $DOCKER_USERNAME/$IMAGE_NAME:$THIS_ARCH-$(git describe --abbrev=0 --tags)
        fi
      done
      echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
      docker push $DOCKER_USERNAME/$IMAGE_NAME
      docker manifest create $DOCKER_USERNAME/$IMAGE_NAME:latest $DOCKER_USERNAME/$IMAGE_NAME:amd64 $DOCKER_USERNAME/$IMAGE_NAME:arm32v7 $DOCKER_USERNAME/$IMAGE_NAME:arm64v8
      docker manifest inspect $DOCKER_USERNAME/$IMAGE_NAME:latest
      docker manifest push $DOCKER_USERNAME/$IMAGE_NAME:latest
      if [ $COMMIT_SHA == $LATEST_TAG_SHA ]; then
        TAG=$(git describe --abbrev=0 --tags)
        docker manifest create $DOCKER_USERNAME/$IMAGE_NAME:$TAG $DOCKER_USERNAME/$IMAGE_NAME:amd64 $DOCKER_USERNAME/$IMAGE_NAME:arm32v7 $DOCKER_USERNAME/$IMAGE_NAME:arm64v8
        docker manifest inspect $DOCKER_USERNAME/$IMAGE_NAME:$TAG
        docker manifest push $DOCKER_USERNAME/$IMAGE_NAME:$TAG
      fi
    fi

================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2019 尹傲雄

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# 12306验证码识别服务器

[![Uptime Robot status](https://img.shields.io/uptimerobot/status/m783635180-ab3d4772f147c2a3b92f8fe5)](https://stats.uptimerobot.com/oyKyLhjJQ/783635180) [![Uptime Robot ratio (30 days)](https://img.shields.io/uptimerobot/ratio/m783635180-ab3d4772f147c2a3b92f8fe5)](https://stats.uptimerobot.com/oyKyLhjJQ/783635180) [![Build Status](https://travis-ci.org/YinAoXiong/12306_code_server.svg?branch=master)](https://travis-ci.org/YinAoXiong/12306_code_server) [![Docker Pulls](https://img.shields.io/docker/pulls/yinaoxiong/12306_code_server)](https://hub.docker.com/r/yinaoxiong/12306_code_server)

该项目用于构建自托管的12306验证码识别服务器，本项目的全部模型和部分代码来自于此项目 [easy12306](https://github.com/zhaipro/easy12306)，使用该项目构建的api符合 [12306购票小助手](https://github.com/testerSunshine/12306)云打码格式可以直接调用。

提供一个部署好的线上版本, [https://12306.yinaoxiong.cn](https://12306.yinaoxiong.cn/),部署在腾讯云1核1G的学生机上不保证可用性,服务状态可以通过 [https://stats.uptimerobot.com/oyKyLhjJQ/783635180](https://stats.uptimerobot.com/oyKyLhjJQ/783635180)查看.


## 接口规范

### 请求

- Method: **POST**
- URL:  ```/verify/base64/```
- Headers: Content-Type: application/x-www-form-urlencoded
- Body: 
  imageFile=>Base64 encoding of the image

### 响应

- Headers：Content-Type:application/json
- Body：

```json
{
    "code": 0,
    "data": [
        "1",  //答案图片的编号数组
        "3"
    ],
    "massage": "识别成功"
}
{
    "code": 1,
    "data": [
    ],
    "massage": "识别失败"
}
```


## python版本支持

- [x] 3.5-3.7

## 平台支持

- [x] amd64
- [x] arm64v8
- [x] arm32v7

其中arm平台建议通过docker运行

## 部署

### docker部署(推荐)

使用docker可以使用如下命令快速部署:


  ```shell
  docker run -d -p 8080:80 --name 12306 yinaoxiong/12306_code_server
  ```

### docker-compose部署(推荐)


```yaml
version: "3"

services:
  code_12306:
    image: yinaoxiong/12306_code_server
    ports:
      - 5002:80 #可以根据需要修改端口
    environment:
      - WORKERS=1 #gunicorn works 默认为1可以根据服务器配置自行调整
    restart: always
  
```

### 通过源码部署

1. 克隆并进入项目

   ```shell
   git clone https://github.com/YinAoXiong/12306_code_server.git
   cd 12306_code_server
   ```

2. 安装依赖 自行根据平台和python选择对应的tflite（下面的例子为amd64，python3.7，其他情况对应的下载地址见 [https://www.tensorflow.org/lite/guide/python](https://www.tensorflow.org/lite/guide/python)，可自行在requirements.txt中替换）

   ```shell
   pip3 install -r requirements.txt
   ```

3. 下载模型文件

    ```shell
    bash download_model.sh
    ```
    从GitHub下载慢的话可以选择执行下面的命令

    ```shell
    wget -c https://cdn.yinaoxiong.cn/models/image.model.tflite
    wget -c https://cdn.yinaoxiong.cn/models/text.model.tflite
    ```

4. 运行 默认workers为1，使用80端口，可以自行修改 gunicorn.conf

   ```shell
   gunicorn app:app -c gunicorn.conf.py
   ```

不推荐在arm平台上使用源码部署,依赖安装有些麻烦.

## 致谢

- [easy12306](https://github.com/zhaipro/easy12306) 提供项目运行的model
-  [12306购票小助手](https://github.com/testerSunshine/12306)源于该项目的一个issue
- ~~[tensorflow-on-arm](https://github.com/lhelontra/tensorflow-on-arm)提供arm上运行的tensorflow python包~~ v1.1版本后开始使用tflite而非keras


================================================
FILE: app.py
================================================
# -*- coding: utf-8 -*-
import io
import base64
import flask
import numpy as np
from PIL import Image, ImageFile
from verify import pretreatment
import tflite_runtime.interpreter as tflite

app = flask.Flask(__name__)
# 模型的全局变量
textModel = None
imgModel = None
# 设置加载截断的图片，解决issue #10
ImageFile.LOAD_TRUNCATED_IMAGES = True


@app.before_first_request
def load_model():
    '''
    加载模型函数
    :return:
    '''
    global textModel
    global imgModel
    textModel = tflite.Interpreter(
        'text.model.tflite')
    textModel.allocate_tensors()
    imgModel = tflite.Interpreter(
        'image.model.tflite')
    imgModel.allocate_tensors()


def predict(model, input):
    input_details = model.get_input_details()
    output_details = model.get_output_details()
    model.set_tensor(input_details[0]['index'], np.float32(input))
    model.invoke()
    result = model.get_tensor(output_details[0]['index'])
    return result


def base64_to_image(base64_code):
    '''
    :param base64_code: base64编码的图片
    :return: bgr格式的图片
    '''
    # base64解码
    img_data = base64.b64decode(base64_code)
    # 读取图片
    img = np.asarray(Image.open(io.BytesIO(img_data)))
    # 转换为bgr格式
    img = img[..., ::-1]

    return img


def get_text(img, offset=0):
    '''
    得到图片中文字的部分
    :param img: 原始图像
    :param offset:
    :return: 文字部分的灰度图像
    '''
    text = pretreatment.get_text(img, offset)
    text = text[..., 0] * 0.114 + text[..., 1] * 0.587 + text[
        ..., 2] * 0.299
    text = text / 255.0
    h, w = text.shape
    text.shape = (1, h, w, 1)
    return text


def preprocess_input(x):
    x = x.astype('float32')
    # 我是用cv2来读取的图片，其已经是BGR格式了
    mean = [103.939, 116.779, 123.68]
    x -= mean
    return x


@app.route('/verify/base64/', methods=['POST'])
def predict_verify():
    verify_titles = ['打字机', '调色板', '跑步机', '毛线', '老虎', '安全帽', '沙包', '盘子', '本子', '药片', '双面胶', '龙舟', '红酒', '拖把', '卷尺',
                     '海苔', '红豆', '黑板', '热水袋', '烛台', '钟表', '路灯', '沙拉', '海报', '公交卡', '樱桃', '创可贴', '牌坊', '苍蝇拍', '高压锅',
                     '电线', '网球拍', '海鸥', '风铃', '订书机', '冰箱', '话梅', '排风机', '锅铲', '绿豆', '航母', '电子秤', '红枣', '金字塔', '鞭炮',
                     '菠萝', '开瓶器', '电饭煲', '仪表盘', '棉棒', '篮球', '狮子', '蚂蚁', '蜡烛', '茶盅', '印章', '茶几', '啤酒', '档案袋', '挂钟',
                     '刺绣',
                     '铃铛', '护腕', '手掌印', '锦旗', '文具盒', '辣椒酱', '耳塞', '中国结', '蜥蜴', '剪纸', '漏斗', '锣', '蒸笼', '珊瑚', '雨靴',
                     '薯条',
                     '蜜蜂', '日历', '口哨']
    if flask.request.method == 'POST':
        # 读取并预处理验证码
        img = flask.request.form['imageFile']
        img = base64_to_image(img)
        text = get_text(img)
        imgs = np.array(list(pretreatment._get_imgs(img)))
        imgs = preprocess_input(imgs)
        text_list = []
        label = predict(textModel, text)
        label = label.argmax()
        text = verify_titles[label]
        text_list.append(text)
        # 获取下一个词
        # 根据第一个词的长度来定位第二个词的位置
        if len(text) == 1:
            offset = 27
        elif len(text) == 2:
            offset = 47
        else:
            offset = 60
        text = get_text(img, offset=offset)
        if text.mean() < 0.95:
            label = predict(textModel, text)
            label = label.argmax()
            text = verify_titles[label]
            text_list.append(text)

        print(f"题目为{text_list}")
        labels = predict(imgModel, imgs)
        labels = labels.argmax(axis=1)
        results = []
        for pos, label in enumerate(labels):
            l = verify_titles[label]
            print(pos + 1, l)
            if l in text_list:
                results.append(str(pos + 1))
        if(len(results) != 0):
            return {'code': 0, 'massage': '识别成功', 'data': results}
        else:
            return {'code': 1, 'massage': '识别失败', 'data': results}


@app.route('/')
def hello_world():
    return 'Hello World!'


if __name__ == '__main__':
    app.run()


================================================
FILE: arm32v7-requirements.txt
================================================
flask==1.1.1
Pillow>=6.2.2
gunicorn==19.9.0
gevent==1.4.0
numpy==1.17.4
https://dl.google.com/coral/python/tflite_runtime-1.14.0-cp37-cp37m-linux_armv7l.whl

================================================
FILE: arm64v8-requirements.txt
================================================
flask==1.1.1
gunicorn==19.9.0
https://cdn.yinaoxiong.cn/wheels/numpy-1.17.4-cp37-cp37m-linux_aarch64.whl
https://cdn.yinaoxiong.cn/wheels/Pillow-7.1.2-cp37-cp37m-linux_aarch64.whl
https://cdn.yinaoxiong.cn/wheels/greenlet-0.4.15-cp37-cp37m-linux_aarch64.whl
https://cdn.yinaoxiong.cn/wheels/gevent-1.4.0-cp37-cp37m-linux_aarch64.whl
https://dl.google.com/coral/python/tflite_runtime-1.14.0-cp37-cp37m-linux_aarch64.whl

================================================
FILE: docker/amd64-Dockerfile
================================================
FROM python:3.7-slim-buster

LABEL maintainer="Yin Aoxiong <yinaoxiong@gmail.com>" \
      reference="https://github.com/yinaoxiong/12306_code_server"

WORKDIR /app

RUN set -ex && \
            apt-get update && \
            apt-get install -y wget && \
            apt-get clean && rm -rf /var/lib/apt/lists/*

COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

COPY download_model.sh ./
RUN bash download_model.sh

COPY . .

# 服务运行在80端口
EXPOSE 80

CMD ["gunicorn", "app:app", "-c", "gunicorn.conf.py"]


================================================
FILE: docker/arm32v7-Dockerfile
================================================
FROM arm32v7/python:3.7-slim-buster

LABEL maintainer="Yin Aoxiong <yinaoxiong@gmail.com>" \
      reference="https://github.com/yinaoxiong/12306_code_server"

WORKDIR /app

COPY qemu/qemu-arm-static /usr/bin/qemu-arm-static

RUN set -ex && \
            apt-get update && \
            apt-get install -y libwebpdemux2 libzstd1 libopenjp2-7 libjbig0 libtiff5 liblcms2-2 libwebp6 libwebpmux3  \
            libatlas3-base libgfortran5 wget && \
            apt-get clean && rm -rf /var/lib/apt/lists/*

COPY arm32v7-requirements.txt ./
RUN pip install --no-cache-dir --extra-index-url=https://www.piwheels.org/simple -r arm32v7-requirements.txt

COPY download_model.sh ./
RUN bash download_model.sh

COPY . .

# 服务运行在80端口
EXPOSE 80

CMD ["gunicorn", "app:app", "-c", "gunicorn.conf.py"]


================================================
FILE: docker/arm64v8-Dockerfile
================================================
FROM arm64v8/python:3.7-slim-buster

LABEL maintainer="Yin Aoxiong <yinaoxiong@gmail.com>" \
      reference="https://github.com/yinaoxiong/12306_code_server"

WORKDIR /app

COPY qemu/qemu-aarch64-static /usr/bin/qemu-aarch64-static

RUN set -ex && \
            apt-get update && \
            apt-get install -y libwebpdemux2 libzstd1 libopenjp2-7 libjbig0 libtiff5 liblcms2-2 libwebp6 libwebpmux3  \
            libopenblas-base libgfortran5 wget && \
            apt-get clean && rm -rf /var/lib/apt/lists/*

COPY arm64v8-requirements.txt ./
RUN pip install --no-cache-dir -r arm64v8-requirements.txt

COPY download_model.sh ./
RUN bash download_model.sh

COPY . .

# 服务运行在80端口
EXPOSE 80

CMD ["gunicorn", "app:app", "-c", "gunicorn.conf.py"]


================================================
FILE: download_model.sh
================================================
wget -c https://github.com/YinAoXiong/12306_code_server/releases/download/v1.1/text.model.tflite
wget -c https://github.com/YinAoXiong/12306_code_server/releases/download/v1.1/image.model.tflite


================================================
FILE: gunicorn.conf.py
================================================
import multiprocessing
import os

bind = "0.0.0.0:80"
workers = os.getenv("WORKERS",1)
worker_class = "gevent"

================================================
FILE: requirements.txt
================================================
flask==1.1.1
Pillow>=6.2.2
gunicorn==19.9.0
gevent==1.4.0
numpy==1.17.4
https://dl.google.com/coral/python/tflite_runtime-1.14.0-cp37-cp37m-linux_x86_64.whl

================================================
FILE: verify/__init__.py
================================================


================================================
FILE: verify/localVerifyCode.py
================================================
# coding: utf-8
import TickerConfig

if TickerConfig.AUTO_CODE_TYPE == 2:
    import base64
    import os
    import numpy as np
    from keras import models, backend
    import tensorflow as tf
    from verify import pretreatment
    from verify.mlearn_for_image import preprocess_input
    from io import BytesIO
    from PIL import Image

    graph = tf.get_default_graph()

PATH = lambda p: os.path.abspath(
    os.path.join(os.path.dirname(__file__), p)
)

TEXT_MODEL = ""
IMG_MODEL = ""


def get_text(img, offset=0):
    text = pretreatment.get_text(img, offset)
    text = text[..., 0] * 0.114 + text[..., 1] * 0.587 + text[
        ..., 2] * 0.299  # text = cv2.cvtColor(text, cv2.COLOR_BGR2GRAY)
    text = text / 255.0
    h, w = text.shape
    text.shape = (1, h, w, 1)
    return text


def base64_to_image(base64_code):
    # base64解码
    img_data = base64.b64decode(base64_code)
    # 读取图片
    img = np.asarray(Image.open(BytesIO(img_data)))
    # 转换为bgr格式
    img = img[..., ::-1]

    return img


class Verify:
    def __init__(self):
        self.textModel = ""
        self.imgModel = ""
        self.loadImgModel()
        self.loadTextModel()

    def loadTextModel(self):
        if not self.textModel:
            self.textModel = models.load_model(PATH('../model.v2.0.h5'))
        else:
            print("无需加载模型model.v2.0.h5")

    def loadImgModel(self):
        if not self.imgModel:
            self.imgModel = models.load_model(PATH('../12306.image.model.h5'))

    def verify(self, fn):
        verify_titles = ['打字机', '调色板', '跑步机', '毛线', '老虎', '安全帽', '沙包', '盘子', '本子', '药片', '双面胶', '龙舟', '红酒', '拖把', '卷尺',
                         '海苔', '红豆', '黑板', '热水袋', '烛台', '钟表', '路灯', '沙拉', '海报', '公交卡', '樱桃', '创可贴', '牌坊', '苍蝇拍', '高压锅',
                         '电线', '网球拍', '海鸥', '风铃', '订书机', '冰箱', '话梅', '排风机', '锅铲', '绿豆', '航母', '电子秤', '红枣', '金字塔', '鞭炮',
                         '菠萝', '开瓶器', '电饭煲', '仪表盘', '棉棒', '篮球', '狮子', '蚂蚁', '蜡烛', '茶盅', '印章', '茶几', '啤酒', '档案袋', '挂钟',
                         '刺绣',
                         '铃铛', '护腕', '手掌印', '锦旗', '文具盒', '辣椒酱', '耳塞', '中国结', '蜥蜴', '剪纸', '漏斗', '锣', '蒸笼', '珊瑚', '雨靴',
                         '薯条',
                         '蜜蜂', '日历', '口哨']
        # 读取并预处理验证码
        img = base64_to_image(fn)
        text = get_text(img)
        imgs = np.array(list(pretreatment._get_imgs(img)))
        imgs = preprocess_input(imgs)
        text_list = []
        # 识别文字
        self.loadTextModel()
        global graph
        with graph.as_default():
            label = self.textModel.predict(text)
        label = label.argmax()
        text = verify_titles[label]
        text_list.append(text)
        # 获取下一个词
        # 根据第一个词的长度来定位第二个词的位置
        if len(text) == 1:
            offset = 27
        elif len(text) == 2:
            offset = 47
        else:
            offset = 60
        text = get_text(img, offset=offset)
        if text.mean() < 0.95:
            with graph.as_default():
                label = self.textModel.predict(text)
            label = label.argmax()
            text = verify_titles[label]
            text_list.append(text)
        print("题目为{}".format(text_list))
        # 加载图片分类器
        self.loadImgModel()
        with graph.as_default():
            labels = self.imgModel.predict(imgs)
        labels = labels.argmax(axis=1)
        results = []
        for pos, label in enumerate(labels):
            l = verify_titles[label]
            print(pos + 1, l)
            if l in text_list:
                results.append(str(pos + 1))
        return results


if __name__ == '__main__':
    pass
    # verify("verify-img1.jpeg")


================================================
FILE: verify/mlearn_for_image.py
================================================
# coding: utf-8


def preprocess_input(x):
    x = x.astype('float32')
    # 我是用cv2来读取的图片，其已经是BGR格式了
    mean = [103.939, 116.779, 123.68]
    x -= mean
    return x


================================================
FILE: verify/pretreatment.py
================================================
#! env python
# coding: utf-8
# 功能：对图像进行预处理，将文字部分单独提取出来
# 并存放到ocr目录下
# 文件名为原验证码文件的文件名


def get_text(img, offset=0):
    # 得到图像中的文本部分
    return img[3:22, 120 + offset:177 + offset]


def _get_imgs(img):
    interval = 5
    length = 67
    for x in range(40, img.shape[0] - length, interval + length):
        for y in range(interval, img.shape[1] - length, interval + length):
            yield img[x:x + length, y:y + length]