Repository: ever391/crack_gs Branch: master Commit: 8958456e9638 Files: 4 Total size: 9.2 KB Directory structure: gitextract_74bz74kt/ ├── .idea/ │ └── vcs.xml ├── README.md ├── crack_slide_captcha.py └── requirements.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: .idea/vcs.xml ================================================ ================================================ FILE: README.md ================================================ # 全国工商企业信息查询 验证码破解 全国工商企业信息查询 验证码破解 ## 如果图片不能显示,请将以下内容加入到本地计算机的hosts文件中 ``` 192.30.253.112 github.com 192.30.253.119 gist.github.com 151.101.184.133 assets-cdn.github.com 151.101.184.133 raw.githubusercontent.com 151.101.184.133 gist.githubusercontent.com 151.101.184.133 cloud.githubusercontent.com 151.101.184.133 camo.githubusercontent.com 151.101.184.133 avatars0.githubusercontent.com 151.101.184.133 avatars1.githubusercontent.com 151.101.184.133 avatars2.githubusercontent.com 151.101.184.133 avatars3.githubusercontent.com 151.101.184.133 avatars4.githubusercontent.com 151.101.184.133 avatars5.githubusercontent.com 151.101.184.133 avatars6.githubusercontent.com 151.101.184.133 avatars7.githubusercontent.com 151.101.184.133 avatars8.githubusercontent.com ``` ## 项目演式 ![image](https://github.com/ever391/crack_gs/blob/master/crack.gif) ## 使用说明: - 调用crack_slide_captcha.py文件 - 实例化CrackSlide对象 - 执行exec_crack.run方法 参数:is_display 是否显示, 值为True/False - 返回通过验证码以后的结果 ## 工商企业信息查询网自带机器学习识别特征,大量访问会造成行为特征识别,无法通过。 ## PS 。。。。。。: 第一次开源好用的东东,以后要多多分享 如果喜欢请Fork下我, 后期会再次针对进行优化, 如果有好的方法,欢迎共同维护! Thank you! ## 特别感谢: iYgnohZ 影响了我,让我也走向开源! ## 本方法仅供学习 ================================================ FILE: crack_slide_captcha.py ================================================ # coding:utf8 from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.desired_capabilities import DesiredCapabilities try: from Pillow import Image as image except ImportError: from PIL import Image as image import time,re, random import requests try: from StringIO import StringIO except ImportError: from io import StringIO class CrackSlide(object): def __init__(self): self.agent = 'Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0' self.headers = {'User-Agent': self.agent} self.is_display = True def get_browser(self): if self.is_display == True: self.browser = webdriver.Chrome() else: agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0 Safari/534.27" dcap = DesiredCapabilities.PHANTOMJS dcap['phantomjs.page.settings.userAgent'] = agent self.browser = webdriver.PhantomJS(desired_capabilities=dcap) self.browser.get("http://www.gsxt.gov.cn/") time.sleep(1) def exec_crack(self, text=u'中国移动', is_display=True): self.is_display = is_display self.get_browser() self.input_text(text=text) self.submit() self.waiting_element() image1 = self.get_image("//div[@class='gt_cut_bg gt_show']/div") image2 = self.get_image("//div[@class='gt_cut_fullbg gt_show']/div") offset = self.get_diff_location(image1, image2) track_list = self.get_track(offset) self.drag_and_drop(track_list, offset) time.sleep(2) content = self.browser.page_source self.browser.delete_all_cookies() self.browser.quit() return content def drag_and_drop(self, track_list, offset): element = self.browser.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']") location = element.location y = location['y'] ActionChains(self.browser).click_and_hold(on_element=element).perform() num = 30 count = 0 for track in track_list: ActionChains(self.browser).move_to_element_with_offset(to_element=element, xoffset=22, yoffset=random.randint(380, 390)).perform() time.sleep(0.01) for track in track_list: ActionChains(self.browser).move_to_element_with_offset(to_element=element, xoffset=-22, yoffset=random.randint(380, 390)).perform() time.sleep(0.01) for track in track_list: ActionChains(self.browser).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=random.randint(380, 390)).perform() count += 1 if count < num: time.sleep(0.01) else: time.sleep(0.1) for i in xrange(5): ActionChains(self.browser).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=random.randint(380, 390)).perform() time.sleep(0.1) ActionChains(self.browser).release(on_element=element).perform() def input_text(self, text, el_id='keyword'): input_el = self.browser.find_element_by_id(el_id) input_el.click() input_el.send_keys(text) time.sleep(2) def submit(self, el_class='f18'): submit_el = self.browser.find_element_by_class_name(el_class) submit_el.click() time.sleep(2) def get_image(self, img_xpath): background_images = self.browser.find_elements_by_xpath(img_xpath) location_list = [] imageurl = '' if self.is_display == True: for background_image in background_images: location = {} location['x'] = int(re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;", background_image.get_attribute('style'))[0][1]) location['y'] = int(re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;", background_image.get_attribute('style'))[0][2]) imageurl = re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;", background_image.get_attribute('style'))[0][0] location_list.append(location) else: for background_image in background_images: location = {} location['x'] = int(re.findall("background-image: url\((.*)\); background-position: (.*)px (.*)px;", background_image.get_attribute('style'))[0][1]) location['y'] = int(re.findall("background-image: url\((.*)\); background-position: (.*)px (.*)px;", background_image.get_attribute('style'))[0][2]) imageurl = re.findall("background-image: url\((.*)\); background-position: (.*)px (.*)px;", background_image.get_attribute('style'))[0][0] location_list.append(location) imageurl = imageurl.replace("webp", "jpg") session = requests.session() response = session.get(imageurl, headers=self.headers, verify=False) image_mix = StringIO(response.content) image = self.get_merge_image(image_mix, location_list) return image def get_merge_image(self, image_mix, location_list): im = image.open(image_mix) im_list_upper=[] im_list_down=[] for location in location_list: if location['y']==-58: im_list_upper.append(im.crop((abs(location['x']), 58, abs(location['x'])+10, 166))) if location['y']==0: im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x'])+10, 58))) new_im = image.new('RGB', (260,116)) x_offset = 0 for im in im_list_upper: new_im.paste(im, (x_offset,0)) x_offset += im.size[0] x_offset = 0 for im in im_list_down: new_im.paste(im, (x_offset,58)) x_offset += im.size[0] return new_im def is_similar(self, image1, image2, x, y): pixel1 = image1.getpixel((x, y)) pixel2 = image2.getpixel((x, y)) for i in range(0, 3): if abs(pixel1[i] - pixel2[i]) >= 50: return False return True def get_diff_location(self, image1, image2): for i in range(0, 260): for j in range(0, 116): if self.is_similar(image1, image2, i, j) == False: return i def get_track(self, offset): list = [] x = random.randint(3, 8) while offset - x >= 5: list.append(x) offset = offset - x x = random.randint(2, 3) for i in range(offset): list.append(1) return list def waiting_element(self): WebDriverWait(self.browser, 30).until( lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']").is_displayed()) WebDriverWait(self.browser, 30).until( lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_bg gt_show']").is_displayed()) WebDriverWait(self.browser, 30).until( lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_fullbg gt_show']").is_displayed()) if __name__ == "__main__": crack = CrackSlide() content = crack.exec_crack(is_display=True) ================================================ FILE: requirements.txt ================================================ appdirs==1.4.3 olefile==0.44 packaging==16.8 Pillow==4.0.0 pyparsing==2.2.0 requests==2.13.0 selenium==3.3.1 six==1.10.0