[
  {
    "path": ".idea/vcs.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"VcsDirectoryMappings\">\n    <mapping directory=\"$PROJECT_DIR$\" vcs=\"Git\" />\n  </component>\n</project>"
  },
  {
    "path": "README.md",
    "content": "# 全国工商企业信息查询 验证码破解\n全国工商企业信息查询 验证码破解 \n\n## 如果图片不能显示，请将以下内容加入到本地计算机的hosts文件中\n```\n\n192.30.253.112    github.com \n192.30.253.119    gist.github.com\n151.101.184.133    assets-cdn.github.com\n151.101.184.133    raw.githubusercontent.com\n151.101.184.133    gist.githubusercontent.com\n151.101.184.133    cloud.githubusercontent.com\n151.101.184.133    camo.githubusercontent.com\n151.101.184.133    avatars0.githubusercontent.com\n151.101.184.133    avatars1.githubusercontent.com\n151.101.184.133    avatars2.githubusercontent.com\n151.101.184.133    avatars3.githubusercontent.com\n151.101.184.133    avatars4.githubusercontent.com\n151.101.184.133    avatars5.githubusercontent.com\n151.101.184.133    avatars6.githubusercontent.com\n151.101.184.133    avatars7.githubusercontent.com\n151.101.184.133    avatars8.githubusercontent.com\n\n```\n\n## 项目演式\n![image](https://github.com/ever391/crack_gs/blob/master/crack.gif)\n\n\n## 使用说明：\n- 调用crack_slide_captcha.py文件\n- 实例化CrackSlide对象\n- 执行exec_crack.run方法 参数：is_display 是否显示， 值为True/False\n- 返回通过验证码以后的结果\n\n\n## 工商企业信息查询网自带机器学习识别特征，大量访问会造成行为特征识别,无法通过。\n\n## PS  。。。。。。：\n    第一次开源好用的东东，以后要多多分享\n    如果喜欢请Fork下我，  后期会再次针对进行优化，\n    如果有好的方法，欢迎共同维护！\n    Thank you!\n\n## 特别感谢：\n    iYgnohZ 影响了我，让我也走向开源！\n\n## 本方法仅供学习\n"
  },
  {
    "path": "crack_slide_captcha.py",
    "content": "# coding:utf8\nfrom selenium import webdriver\nfrom selenium.webdriver.support.ui import WebDriverWait\nfrom selenium.webdriver.common.action_chains import ActionChains\nfrom selenium.webdriver.common.desired_capabilities import DesiredCapabilities\ntry:\n    from Pillow import Image as image\nexcept ImportError:\n    from PIL import Image as image\nimport time,re, random\nimport requests\ntry:\n    from StringIO import StringIO\nexcept ImportError:\n    from io import StringIO\n\n\nclass CrackSlide(object):\n\n    def __init__(self):\n        self.agent = 'Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0'\n        self.headers = {'User-Agent': self.agent}\n        self.is_display = True\n\n    def get_browser(self):\n        if self.is_display == True:\n            self.browser = webdriver.Chrome()\n        else:\n            agent = \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0 Safari/534.27\"\n            dcap = DesiredCapabilities.PHANTOMJS\n            dcap['phantomjs.page.settings.userAgent'] = agent\n            self.browser = webdriver.PhantomJS(desired_capabilities=dcap)\n        self.browser.get(\"http://www.gsxt.gov.cn/\")\n        time.sleep(1)\n\n    def exec_crack(self, text=u'中国移动', is_display=True):\n        self.is_display = is_display\n        self.get_browser()\n        self.input_text(text=text)\n        self.submit()\n        self.waiting_element()\n        image1 = self.get_image(\"//div[@class='gt_cut_bg gt_show']/div\")\n        image2 = self.get_image(\"//div[@class='gt_cut_fullbg gt_show']/div\")\n        offset = self.get_diff_location(image1, image2)\n        track_list = self.get_track(offset)\n        self.drag_and_drop(track_list, offset)\n        time.sleep(2)\n        content =  self.browser.page_source\n        self.browser.delete_all_cookies()\n        self.browser.quit()\n        return content\n\n    def drag_and_drop(self, track_list, offset):\n        element = self.browser.find_element_by_xpath(\"//div[@class='gt_slider_knob gt_show']\")\n        location = element.location\n        y = location['y']\n        ActionChains(self.browser).click_and_hold(on_element=element).perform()\n        num = 30\n        count = 0\n\n        for track in track_list:\n\n            ActionChains(self.browser).move_to_element_with_offset(to_element=element, xoffset=22,\n                                                             yoffset=random.randint(380, 390)).perform()\n            time.sleep(0.01)\n\n        for track in track_list:\n\n            ActionChains(self.browser).move_to_element_with_offset(to_element=element, xoffset=-22,\n                                                             yoffset=random.randint(380, 390)).perform()\n            time.sleep(0.01)\n\n        for track in track_list:\n\n            ActionChains(self.browser).move_to_element_with_offset(to_element=element, xoffset=track + 22,\n                                                             yoffset=random.randint(380, 390)).perform()\n            count += 1\n            if count < num:\n                time.sleep(0.01)\n            else:\n                time.sleep(0.1)\n        for i in xrange(5):\n            ActionChains(self.browser).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=random.randint(380, 390)).perform()\n            time.sleep(0.1)\n        ActionChains(self.browser).release(on_element=element).perform()\n\n    def input_text(self, text, el_id='keyword'):\n        input_el = self.browser.find_element_by_id(el_id)\n        input_el.click()\n        input_el.send_keys(text)\n        time.sleep(2)\n\n    def submit(self, el_class='f18'):\n        submit_el = self.browser.find_element_by_class_name(el_class)\n        submit_el.click()\n        time.sleep(2)\n\n    def get_image(self, img_xpath):\n        background_images = self.browser.find_elements_by_xpath(img_xpath)\n        location_list = []\n        imageurl = ''\n        if self.is_display == True:\n            for background_image in background_images:\n                location = {}\n                location['x'] = int(re.findall(\"background-image: url\\(\\\"(.*)\\\"\\); background-position: (.*)px (.*)px;\",\n                                             background_image.get_attribute('style'))[0][1])\n                location['y'] = int(re.findall(\"background-image: url\\(\\\"(.*)\\\"\\); background-position: (.*)px (.*)px;\",\n                                             background_image.get_attribute('style'))[0][2])\n                imageurl = re.findall(\"background-image: url\\(\\\"(.*)\\\"\\); background-position: (.*)px (.*)px;\",\n                                      background_image.get_attribute('style'))[0][0]\n                location_list.append(location)\n        else:\n            for background_image in background_images:\n                location = {}\n                location['x'] = int(re.findall(\"background-image: url\\((.*)\\); background-position: (.*)px (.*)px;\",\n                                               background_image.get_attribute('style'))[0][1])\n                location['y'] = int(re.findall(\"background-image: url\\((.*)\\); background-position: (.*)px (.*)px;\",\n                                               background_image.get_attribute('style'))[0][2])\n                imageurl = re.findall(\"background-image: url\\((.*)\\); background-position: (.*)px (.*)px;\",\n                                      background_image.get_attribute('style'))[0][0]\n                location_list.append(location)\n\n        imageurl = imageurl.replace(\"webp\", \"jpg\")\n\n        session = requests.session()\n        response = session.get(imageurl, headers=self.headers, verify=False)\n        image_mix = StringIO(response.content)\n        image = self.get_merge_image(image_mix, location_list)\n        return image\n\n\n    def get_merge_image(self, image_mix, location_list):\n        im = image.open(image_mix)\n        im_list_upper=[]\n        im_list_down=[]\n        for location in location_list:\n            if location['y']==-58:\n                im_list_upper.append(im.crop((abs(location['x']), 58, abs(location['x'])+10, 166)))\n            if location['y']==0:\n                im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x'])+10, 58)))\n        new_im = image.new('RGB', (260,116))\n        x_offset = 0\n        for im in im_list_upper:\n            new_im.paste(im, (x_offset,0))\n            x_offset += im.size[0]\n        x_offset = 0\n        for im in im_list_down:\n            new_im.paste(im, (x_offset,58))\n            x_offset += im.size[0]\n        return new_im\n\n    def is_similar(self, image1, image2, x, y):\n        pixel1 = image1.getpixel((x, y))\n        pixel2 = image2.getpixel((x, y))\n        for i in range(0, 3):\n            if abs(pixel1[i] - pixel2[i]) >= 50:\n                return False\n        return True\n\n    def get_diff_location(self, image1, image2):\n        for i in range(0, 260):\n            for j in range(0, 116):\n                if self.is_similar(image1, image2, i, j) == False:\n                    return i\n\n    def get_track(self, offset):\n        list = []\n        x = random.randint(3, 8)\n        while offset - x >= 5:\n            list.append(x)\n            offset = offset - x\n            x = random.randint(2, 3)\n        for i in range(offset):\n            list.append(1)\n        return list\n\n    def waiting_element(self):\n        WebDriverWait(self.browser, 30).until(\n            lambda the_driver: the_driver.find_element_by_xpath(\"//div[@class='gt_slider_knob gt_show']\").is_displayed())\n        WebDriverWait(self.browser, 30).until(\n            lambda the_driver: the_driver.find_element_by_xpath(\"//div[@class='gt_cut_bg gt_show']\").is_displayed())\n        WebDriverWait(self.browser, 30).until(\n            lambda the_driver: the_driver.find_element_by_xpath(\"//div[@class='gt_cut_fullbg gt_show']\").is_displayed())\n\nif __name__ == \"__main__\":\n    crack = CrackSlide()\n    content = crack.exec_crack(is_display=True)"
  },
  {
    "path": "requirements.txt",
    "content": "appdirs==1.4.3\nolefile==0.44\npackaging==16.8\nPillow==4.0.0\npyparsing==2.2.0\nrequests==2.13.0\nselenium==3.3.1\nsix==1.10.0\n"
  }
]