Repository: ym2011/PEST
Branch: master
Commit: 15e4d1c42192
Files: 898
Total size: 18.8 MB
Directory structure:
gitextract_q3uml0q4/
├── .gitignore
├── BruteXSS/
│ ├── License.txt
│ ├── README.md
│ ├── brutexss.py
│ ├── colorama/
│ │ ├── __init__.py
│ │ ├── ansi.py
│ │ ├── ansitowin32.py
│ │ ├── initialise.py
│ │ ├── win32.py
│ │ └── winterm.py
│ ├── mechanize/
│ │ ├── __init__.py
│ │ ├── _auth.py
│ │ ├── _beautifulsoup.py
│ │ ├── _clientcookie.py
│ │ ├── _debug.py
│ │ ├── _firefox3cookiejar.py
│ │ ├── _form.py
│ │ ├── _gzip.py
│ │ ├── _headersutil.py
│ │ ├── _html.py
│ │ ├── _http.py
│ │ ├── _lwpcookiejar.py
│ │ ├── _markupbase.py
│ │ ├── _mechanize.py
│ │ ├── _mozillacookiejar.py
│ │ ├── _msiecookiejar.py
│ │ ├── _opener.py
│ │ ├── _pullparser.py
│ │ ├── _request.py
│ │ ├── _response.py
│ │ ├── _rfc3986.py
│ │ ├── _sgmllib_copy.py
│ │ ├── _sockettimeout.py
│ │ ├── _testcase.py
│ │ ├── _urllib2.py
│ │ ├── _urllib2_fork.py
│ │ ├── _useragent.py
│ │ ├── _util.py
│ │ └── _version.py
│ ├── wordlist-huge.txt
│ ├── wordlist-medium.txt
│ ├── wordlist-small.txt
│ └── wordlist.txt
├── Burpsuite/
│ ├── BurpSuite_2021.bat
│ ├── README.MD
│ ├── Run Burp Suite_v1.7.37.bat
│ ├── download-link.txt
│ └── xray_run_with_burp.bat
├── DBScanner/
│ ├── .gitignore
│ ├── LICENSE
│ ├── README.md
│ ├── about oracle.txt
│ └── dbscan.py
├── Github_Leak/
│ ├── GitHack-master/
│ │ ├── GitHack.py
│ │ ├── README.md
│ │ └── lib/
│ │ ├── __init__.py
│ │ └── parser.py
│ └── Github-Hunter-master/
│ ├── GithubHunter.py
│ ├── README.md
│ ├── info.ini.example
│ └── requirements.txt
├── Linux_scripts/
│ ├── recover_ss.sh
│ ├── run_armitage.sh
│ ├── set_proxy.sh
│ └── siege_batchaccess.sh
├── README.md
├── Snorby_scripts/
│ ├── README.MD
│ ├── setup.sh
│ ├── start_ids.sh
│ └── stop_ids.sh
├── User Agents/
│ ├── LICENSE
│ ├── README.md
│ ├── SwitchyOmega/
│ │ ├── OmegaOptions.bak
│ │ └── SwitchyOmega_v2.2.11.crx
│ ├── csv/
│ │ ├── android-browser.csv
│ │ ├── chrome.csv
│ │ ├── firefox.csv
│ │ ├── internet-explorer.csv
│ │ ├── opera.csv
│ │ ├── safari.csv
│ │ ├── techpatterns_com_useragentswitcher.csv
│ │ └── ua_org_allagents.csv
│ ├── json/
│ │ ├── android-browser.json
│ │ ├── chrome.json
│ │ ├── firefox.json
│ │ ├── internet-explorer.json
│ │ ├── opera.json
│ │ ├── safari.json
│ │ ├── techpatterns_com_useragentswitcher.json
│ │ └── ua_org_allagents.json
│ ├── requirements.txt
│ ├── user agent example.txt
│ └── useragents.py
├── Weak_Password/
│ ├── Bruteforce/
│ │ ├── README.md
│ │ ├── bruteforce.py
│ │ ├── comm/
│ │ │ ├── __init__.py
│ │ │ ├── config.py
│ │ │ ├── portscan.py
│ │ │ └── printers.py
│ │ ├── conf/
│ │ │ ├── .svn/
│ │ │ │ ├── all-wcprops
│ │ │ │ ├── entries
│ │ │ │ └── text-base/
│ │ │ │ └── config.txt.svn-base
│ │ │ ├── ftp.conf
│ │ │ ├── ldapd.conf
│ │ │ ├── mongodb.conf
│ │ │ ├── mssql.conf
│ │ │ ├── mysql.conf
│ │ │ ├── pop3.conf
│ │ │ ├── postgres.conf
│ │ │ ├── signs.conf
│ │ │ ├── smb.conf
│ │ │ ├── snmp.conf
│ │ │ ├── ssh.conf
│ │ │ ├── tomcat.conf
│ │ │ ├── vnc.conf
│ │ │ └── web.conf
│ │ ├── factorys/
│ │ │ ├── __init__.py
│ │ │ └── pluginFactory.py
│ │ ├── plugins/
│ │ │ ├── __init__.py
│ │ │ ├── ftp.py
│ │ │ ├── ldapd.py
│ │ │ ├── mongodb.py
│ │ │ ├── mssql.py
│ │ │ ├── mysql.py
│ │ │ ├── pop3.py
│ │ │ ├── postgres.py
│ │ │ ├── redisexp.py
│ │ │ ├── rsync.py
│ │ │ ├── rsynclib.py
│ │ │ ├── smb.py
│ │ │ ├── snmp.py
│ │ │ ├── ssh.py
│ │ │ ├── ssltest.py
│ │ │ ├── vnc.py
│ │ │ ├── vnclib.py
│ │ │ └── web.py
│ │ └── requirements.txt
│ ├── Fuxi-Scanner/
│ │ ├── .gitattributes
│ │ ├── .gitignore
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── doc/
│ │ │ ├── INSTALL.en.md
│ │ │ ├── INSTALL.zh.md
│ │ │ └── README.zh.md
│ │ ├── fuxi/
│ │ │ ├── __init__.py
│ │ │ ├── app.py
│ │ │ ├── static/
│ │ │ │ ├── css/
│ │ │ │ │ ├── dropzone.css
│ │ │ │ │ ├── duallistbox/
│ │ │ │ │ │ ├── bootstrap-duallistbox.css
│ │ │ │ │ │ └── bootstrap-multiselect.css
│ │ │ │ │ ├── font-awesome-4.7.0/
│ │ │ │ │ │ ├── css/
│ │ │ │ │ │ │ └── font-awesome.css
│ │ │ │ │ │ └── fonts/
│ │ │ │ │ │ └── FontAwesome.otf
│ │ │ │ │ └── main.css
│ │ │ │ ├── download/
│ │ │ │ │ └── test
│ │ │ │ └── js/
│ │ │ │ ├── bootstrap/
│ │ │ │ │ └── bootstrap-multiselect.js
│ │ │ │ ├── jquery/
│ │ │ │ │ └── jquery.bootstrap-duallistbox.js
│ │ │ │ ├── main.js
│ │ │ │ ├── plugins/
│ │ │ │ │ ├── Chart.js
│ │ │ │ │ ├── FeedEk.js
│ │ │ │ │ ├── bootstrap-datetimepicker.js
│ │ │ │ │ ├── dropzone.js
│ │ │ │ │ └── laydate/
│ │ │ │ │ ├── laydate.js
│ │ │ │ │ └── theme/
│ │ │ │ │ └── default/
│ │ │ │ │ └── laydate.css
│ │ │ │ └── server/
│ │ │ │ ├── acunetix-scanner.js
│ │ │ │ ├── asset-management.js
│ │ │ │ ├── asset-services-list.js
│ │ │ │ ├── auth-tester-tasks.js
│ │ │ │ ├── checkbox.js
│ │ │ │ ├── dashboard.js
│ │ │ │ ├── new-asset.js
│ │ │ │ ├── new-auth-tester.js
│ │ │ │ ├── new-scan.js
│ │ │ │ ├── plugin-management.js
│ │ │ │ ├── port-scanner.js
│ │ │ │ ├── search.js
│ │ │ │ ├── settings.js
│ │ │ │ ├── subdomain-brute.js
│ │ │ │ ├── subdomain-list.js
│ │ │ │ ├── task-management.js
│ │ │ │ ├── update.js
│ │ │ │ ├── vulnerability.js
│ │ │ │ └── week-passwd-list.js
│ │ │ ├── templates/
│ │ │ │ ├── 404.html
│ │ │ │ ├── 500.html
│ │ │ │ ├── acunetix-scanner.html
│ │ │ │ ├── acunetix-tasks.html
│ │ │ │ ├── advanced-option.html
│ │ │ │ ├── asset-management.html
│ │ │ │ ├── asset-services.html
│ │ │ │ ├── auth-tester-tasks.html
│ │ │ │ ├── base.html
│ │ │ │ ├── dashboard.html
│ │ │ │ ├── index.html
│ │ │ │ ├── login.html
│ │ │ │ ├── new-asset.html
│ │ │ │ ├── new-auth-tester.html
│ │ │ │ ├── new-scan.html
│ │ │ │ ├── plugin-management.html
│ │ │ │ ├── port-scanner.html
│ │ │ │ ├── search.html
│ │ │ │ ├── subdomain-brute.html
│ │ │ │ ├── subdomain-list.html
│ │ │ │ ├── system-config.html
│ │ │ │ ├── task-management.html
│ │ │ │ ├── vulnerability.html
│ │ │ │ └── week-passwd-list.html
│ │ │ └── views/
│ │ │ ├── __init__.py
│ │ │ ├── acunetix_scanner.py
│ │ │ ├── asset_management.py
│ │ │ ├── auth_tester.py
│ │ │ ├── authenticate.py
│ │ │ ├── dashboard.py
│ │ │ ├── index.py
│ │ │ ├── lib/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── get_title.py
│ │ │ │ ├── mongo_db.py
│ │ │ │ └── parse_target.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── acunetix_scanner/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── awvs_api.py
│ │ │ │ ├── auth_tester/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── auth_scanner.py
│ │ │ │ │ └── hydra_plugin.py
│ │ │ │ ├── discovery/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── asset_discovery.py
│ │ │ │ ├── port_scanner/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── nmap_scanner.py
│ │ │ │ ├── scanner/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── parse_plugin.py
│ │ │ │ │ └── poc_scanner.py
│ │ │ │ └── subdomain/
│ │ │ │ ├── __init__.py
│ │ │ │ └── domain_brute.py
│ │ │ ├── plugin_management.py
│ │ │ ├── port_scanner.py
│ │ │ ├── settings.py
│ │ │ ├── subdomain_brute.py
│ │ │ └── vul_scanner.py
│ │ ├── fuxi_scanner.py
│ │ ├── migration/
│ │ │ ├── DataModels
│ │ │ └── start.py
│ │ ├── requirements.txt
│ │ └── run.sh
│ ├── Scrack/
│ │ ├── README.md
│ │ └── Scrack.py
│ └── fenghuangscanner/
│ ├── README.md
│ ├── comm/
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── portscan.py
│ │ └── printers.py
│ ├── conf/
│ │ ├── .svn/
│ │ │ ├── all-wcprops
│ │ │ ├── entries
│ │ │ └── text-base/
│ │ │ └── config.txt.svn-base
│ │ ├── ftp.conf
│ │ ├── ldapd.conf
│ │ ├── mongodb.conf
│ │ ├── mssql.conf
│ │ ├── mysql.conf
│ │ ├── pop3.conf
│ │ ├── postgres.conf
│ │ ├── signs.conf
│ │ ├── smb.conf
│ │ ├── snmp.conf
│ │ ├── ssh.conf
│ │ ├── tomcat.conf
│ │ ├── vnc.conf
│ │ └── web.conf
│ ├── factorys/
│ │ ├── __init__.py
│ │ └── pluginFactory.py
│ ├── main.py
│ ├── plugins/
│ │ ├── __init__.py
│ │ ├── ftp.py
│ │ ├── ldapd.py
│ │ ├── mongodb.py
│ │ ├── mssql.py
│ │ ├── mysql.py
│ │ ├── pop3.py
│ │ ├── postgres.py
│ │ ├── redisexp.py
│ │ ├── rsync.py
│ │ ├── rsynclib.py
│ │ ├── smb.py
│ │ ├── snmp.py
│ │ ├── ssh.py
│ │ ├── ssltest.py
│ │ ├── vnc.py
│ │ ├── vnclib.py
│ │ └── web.py
│ └── requirements.txt
├── Windows_scripts/
│ ├── AppScan_batch_scan/
│ │ ├── AppScan_batch.bat
│ │ ├── READ.ME
│ │ ├── batch.bat
│ │ └── urltoscan0.bat
│ ├── Armitage-FAQ.txt
│ ├── FolderHide.bat
│ ├── FolderUnhide.bat
│ ├── README.MD
│ ├── Windows-security-check/
│ │ ├── Windows-security-check.README
│ │ └── Windows-security-check.bat
│ ├── check_honeyport_status.bat
│ ├── init.bat
│ ├── ipreverse.py
│ ├── networking.txt
│ ├── ping_check_network.bat
│ ├── safedog.py
│ ├── smsbomb.py
│ ├── start_share_v1.2.bat
│ └── stop_share_v1.4.bat
├── antSword/
│ ├── .github/
│ │ └── ISSUE_TEMPLATE.md
│ ├── .gitignore
│ ├── CHANGELOG.md
│ ├── LICENSE
│ ├── README.md
│ ├── README_CN.md
│ ├── app.js
│ ├── modules/
│ │ ├── cache.js
│ │ ├── config.js
│ │ ├── database.js
│ │ ├── logger.js
│ │ ├── menubar.js
│ │ ├── plugStore.js
│ │ ├── request.js
│ │ └── update.js
│ ├── package.json
│ ├── shells/
│ │ ├── README.md
│ │ ├── asp.net_custom_script_for_odbc.aspx
│ │ ├── asp.net_eval_script.aspx
│ │ ├── asp_eval_xxxx_script.asp
│ │ ├── jsp_custom_script_for_mysql.jsp
│ │ ├── jspx_custom_script_for_mysql.jspx
│ │ ├── php_assert_script.php
│ │ ├── php_create_function_script.php
│ │ └── php_custom_script_for_mysql.php
│ ├── source/
│ │ ├── app.entry.js
│ │ ├── base/
│ │ │ ├── cachemanager.js
│ │ │ ├── encodes.js
│ │ │ └── menubar.js
│ │ ├── core/
│ │ │ ├── README.md
│ │ │ ├── asp/
│ │ │ │ ├── encoder/
│ │ │ │ │ └── xxxxdog.js
│ │ │ │ ├── index.js
│ │ │ │ └── template/
│ │ │ │ ├── base.js
│ │ │ │ ├── command.js
│ │ │ │ ├── database/
│ │ │ │ │ ├── access.js
│ │ │ │ │ ├── default.js
│ │ │ │ │ ├── dsn.js
│ │ │ │ │ ├── microsoft_jet_oledb_4_0.js
│ │ │ │ │ ├── mysql.js
│ │ │ │ │ ├── oracle.js
│ │ │ │ │ ├── sqloledb_1.js
│ │ │ │ │ ├── sqloledb_1_sspi.js
│ │ │ │ │ └── sqlserver.js
│ │ │ │ └── filemanager.js
│ │ │ ├── aspx/
│ │ │ │ ├── encoder/
│ │ │ │ │ ├── base64.js
│ │ │ │ │ └── hex.js
│ │ │ │ ├── index.js
│ │ │ │ └── template/
│ │ │ │ ├── base.js
│ │ │ │ ├── command.js
│ │ │ │ ├── database/
│ │ │ │ │ ├── access.js
│ │ │ │ │ ├── default.js
│ │ │ │ │ ├── dsn.js
│ │ │ │ │ ├── microsoft_jet_oledb_4_0.js
│ │ │ │ │ ├── mysql.js
│ │ │ │ │ ├── oracle.js
│ │ │ │ │ ├── sqloledb_1.js
│ │ │ │ │ ├── sqloledb_1_sspi.js
│ │ │ │ │ └── sqlserver.js
│ │ │ │ └── filemanager.js
│ │ │ ├── base.js
│ │ │ ├── custom/
│ │ │ │ ├── encoder/
│ │ │ │ │ ├── base64.js
│ │ │ │ │ └── hex.js
│ │ │ │ ├── index.js
│ │ │ │ └── template/
│ │ │ │ ├── base.js
│ │ │ │ ├── command.js
│ │ │ │ ├── database/
│ │ │ │ │ ├── default.js
│ │ │ │ │ ├── mysql.js
│ │ │ │ │ ├── oracle.js
│ │ │ │ │ └── sqlserver.js
│ │ │ │ └── filemanager.js
│ │ │ ├── index.js
│ │ │ └── php/
│ │ │ ├── encoder/
│ │ │ │ ├── base64.js
│ │ │ │ ├── chr.js
│ │ │ │ ├── chr16.js
│ │ │ │ └── rot13.js
│ │ │ ├── index.js
│ │ │ └── template/
│ │ │ ├── base.js
│ │ │ ├── command.js
│ │ │ ├── database/
│ │ │ │ ├── informix.js
│ │ │ │ ├── mssql.js
│ │ │ │ ├── mysql.js
│ │ │ │ ├── mysqli.js
│ │ │ │ └── oracle.js
│ │ │ └── filemanager.js
│ │ ├── language/
│ │ │ ├── en.js
│ │ │ ├── index.js
│ │ │ └── zh.js
│ │ ├── load.entry.js
│ │ ├── modules/
│ │ │ ├── database/
│ │ │ │ ├── asp/
│ │ │ │ │ └── index.js
│ │ │ │ ├── aspx/
│ │ │ │ │ └── index.js
│ │ │ │ ├── custom/
│ │ │ │ │ └── index.js
│ │ │ │ ├── index.js
│ │ │ │ └── php/
│ │ │ │ └── index.js
│ │ │ ├── filemanager/
│ │ │ │ ├── files.js
│ │ │ │ ├── folder.js
│ │ │ │ ├── index.js
│ │ │ │ └── tasks.js
│ │ │ ├── plugin/
│ │ │ │ └── index.js
│ │ │ ├── settings/
│ │ │ │ ├── about.js
│ │ │ │ ├── aproxy.js
│ │ │ │ ├── display.js
│ │ │ │ ├── encoders.js
│ │ │ │ ├── index.js
│ │ │ │ ├── language.js
│ │ │ │ └── update.js
│ │ │ ├── shellmanager/
│ │ │ │ ├── category/
│ │ │ │ │ ├── index.js
│ │ │ │ │ ├── sidebar.js
│ │ │ │ │ └── toolbar.js
│ │ │ │ ├── data.js
│ │ │ │ ├── index.js
│ │ │ │ └── list/
│ │ │ │ ├── contextmenu.js
│ │ │ │ ├── form.js
│ │ │ │ ├── grid.js
│ │ │ │ └── index.js
│ │ │ ├── terminal/
│ │ │ │ └── index.js
│ │ │ └── viewsite/
│ │ │ ├── README.md
│ │ │ ├── cookiemgr.js
│ │ │ └── index.js
│ │ └── ui/
│ │ ├── README.md
│ │ ├── tabbar.js
│ │ └── window.js
│ ├── static/
│ │ ├── css/
│ │ │ └── index.css
│ │ └── libs/
│ │ ├── ace/
│ │ │ ├── ace.js
│ │ │ ├── ext-beautify.js
│ │ │ ├── ext-chromevox.js
│ │ │ ├── ext-elastic_tabstops_lite.js
│ │ │ ├── ext-emmet.js
│ │ │ ├── ext-error_marker.js
│ │ │ ├── ext-keybinding_menu.js
│ │ │ ├── ext-language_tools.js
│ │ │ ├── ext-linking.js
│ │ │ ├── ext-modelist.js
│ │ │ ├── ext-old_ie.js
│ │ │ ├── ext-searchbox.js
│ │ │ ├── ext-settings_menu.js
│ │ │ ├── ext-spellcheck.js
│ │ │ ├── ext-split.js
│ │ │ ├── ext-static_highlight.js
│ │ │ ├── ext-statusbar.js
│ │ │ ├── ext-textarea.js
│ │ │ ├── ext-themelist.js
│ │ │ ├── ext-whitespace.js
│ │ │ ├── keybinding-emacs.js
│ │ │ ├── keybinding-vim.js
│ │ │ ├── mode-abap.js
│ │ │ ├── mode-abc.js
│ │ │ ├── mode-actionscript.js
│ │ │ ├── mode-ada.js
│ │ │ ├── mode-apache_conf.js
│ │ │ ├── mode-applescript.js
│ │ │ ├── mode-asciidoc.js
│ │ │ ├── mode-assembly_x86.js
│ │ │ ├── mode-autohotkey.js
│ │ │ ├── mode-batchfile.js
│ │ │ ├── mode-c9search.js
│ │ │ ├── mode-c_cpp.js
│ │ │ ├── mode-cirru.js
│ │ │ ├── mode-clojure.js
│ │ │ ├── mode-cobol.js
│ │ │ ├── mode-coffee.js
│ │ │ ├── mode-coldfusion.js
│ │ │ ├── mode-csharp.js
│ │ │ ├── mode-css.js
│ │ │ ├── mode-curly.js
│ │ │ ├── mode-d.js
│ │ │ ├── mode-dart.js
│ │ │ ├── mode-diff.js
│ │ │ ├── mode-django.js
│ │ │ ├── mode-dockerfile.js
│ │ │ ├── mode-dot.js
│ │ │ ├── mode-eiffel.js
│ │ │ ├── mode-ejs.js
│ │ │ ├── mode-elixir.js
│ │ │ ├── mode-elm.js
│ │ │ ├── mode-erlang.js
│ │ │ ├── mode-forth.js
│ │ │ ├── mode-ftl.js
│ │ │ ├── mode-gcode.js
│ │ │ ├── mode-gherkin.js
│ │ │ ├── mode-gitignore.js
│ │ │ ├── mode-glsl.js
│ │ │ ├── mode-golang.js
│ │ │ ├── mode-groovy.js
│ │ │ ├── mode-haml.js
│ │ │ ├── mode-handlebars.js
│ │ │ ├── mode-haskell.js
│ │ │ ├── mode-haxe.js
│ │ │ ├── mode-html.js
│ │ │ ├── mode-html_ruby.js
│ │ │ ├── mode-ini.js
│ │ │ ├── mode-io.js
│ │ │ ├── mode-jack.js
│ │ │ ├── mode-jade.js
│ │ │ ├── mode-java.js
│ │ │ ├── mode-javascript.js
│ │ │ ├── mode-json.js
│ │ │ ├── mode-jsoniq.js
│ │ │ ├── mode-jsp.js
│ │ │ ├── mode-jsx.js
│ │ │ ├── mode-julia.js
│ │ │ ├── mode-latex.js
│ │ │ ├── mode-lean.js
│ │ │ ├── mode-less.js
│ │ │ ├── mode-liquid.js
│ │ │ ├── mode-lisp.js
│ │ │ ├── mode-live_script.js
│ │ │ ├── mode-livescript.js
│ │ │ ├── mode-logiql.js
│ │ │ ├── mode-lsl.js
│ │ │ ├── mode-lua.js
│ │ │ ├── mode-luapage.js
│ │ │ ├── mode-lucene.js
│ │ │ ├── mode-makefile.js
│ │ │ ├── mode-markdown.js
│ │ │ ├── mode-mask.js
│ │ │ ├── mode-matlab.js
│ │ │ ├── mode-maze.js
│ │ │ ├── mode-mel.js
│ │ │ ├── mode-mips_assembler.js
│ │ │ ├── mode-mipsassembler.js
│ │ │ ├── mode-mushcode.js
│ │ │ ├── mode-mysql.js
│ │ │ ├── mode-nix.js
│ │ │ ├── mode-objectivec.js
│ │ │ ├── mode-ocaml.js
│ │ │ ├── mode-pascal.js
│ │ │ ├── mode-perl.js
│ │ │ ├── mode-pgsql.js
│ │ │ ├── mode-php.js
│ │ │ ├── mode-plain_text.js
│ │ │ ├── mode-powershell.js
│ │ │ ├── mode-praat.js
│ │ │ ├── mode-prolog.js
│ │ │ ├── mode-properties.js
│ │ │ ├── mode-protobuf.js
│ │ │ ├── mode-python.js
│ │ │ ├── mode-r.js
│ │ │ ├── mode-rdoc.js
│ │ │ ├── mode-rhtml.js
│ │ │ ├── mode-ruby.js
│ │ │ ├── mode-rust.js
│ │ │ ├── mode-sass.js
│ │ │ ├── mode-scad.js
│ │ │ ├── mode-scala.js
│ │ │ ├── mode-scheme.js
│ │ │ ├── mode-scss.js
│ │ │ ├── mode-sh.js
│ │ │ ├── mode-sjs.js
│ │ │ ├── mode-smarty.js
│ │ │ ├── mode-snippets.js
│ │ │ ├── mode-soy_template.js
│ │ │ ├── mode-space.js
│ │ │ ├── mode-sql.js
│ │ │ ├── mode-sqlserver.js
│ │ │ ├── mode-stylus.js
│ │ │ ├── mode-svg.js
│ │ │ ├── mode-tcl.js
│ │ │ ├── mode-tex.js
│ │ │ ├── mode-text.js
│ │ │ ├── mode-textile.js
│ │ │ ├── mode-toml.js
│ │ │ ├── mode-twig.js
│ │ │ ├── mode-typescript.js
│ │ │ ├── mode-vala.js
│ │ │ ├── mode-vbscript.js
│ │ │ ├── mode-velocity.js
│ │ │ ├── mode-verilog.js
│ │ │ ├── mode-vhdl.js
│ │ │ ├── mode-xml.js
│ │ │ ├── mode-xquery.js
│ │ │ ├── mode-yaml.js
│ │ │ ├── snippets/
│ │ │ │ ├── abap.js
│ │ │ │ ├── abc.js
│ │ │ │ ├── actionscript.js
│ │ │ │ ├── ada.js
│ │ │ │ ├── apache_conf.js
│ │ │ │ ├── applescript.js
│ │ │ │ ├── asciidoc.js
│ │ │ │ ├── assembly_x86.js
│ │ │ │ ├── autohotkey.js
│ │ │ │ ├── batchfile.js
│ │ │ │ ├── c9search.js
│ │ │ │ ├── c_cpp.js
│ │ │ │ ├── cirru.js
│ │ │ │ ├── clojure.js
│ │ │ │ ├── cobol.js
│ │ │ │ ├── coffee.js
│ │ │ │ ├── coldfusion.js
│ │ │ │ ├── csharp.js
│ │ │ │ ├── css.js
│ │ │ │ ├── curly.js
│ │ │ │ ├── d.js
│ │ │ │ ├── dart.js
│ │ │ │ ├── diff.js
│ │ │ │ ├── django.js
│ │ │ │ ├── dockerfile.js
│ │ │ │ ├── dot.js
│ │ │ │ ├── eiffel.js
│ │ │ │ ├── ejs.js
│ │ │ │ ├── elixir.js
│ │ │ │ ├── elm.js
│ │ │ │ ├── erlang.js
│ │ │ │ ├── forth.js
│ │ │ │ ├── ftl.js
│ │ │ │ ├── gcode.js
│ │ │ │ ├── gherkin.js
│ │ │ │ ├── gitignore.js
│ │ │ │ ├── glsl.js
│ │ │ │ ├── golang.js
│ │ │ │ ├── groovy.js
│ │ │ │ ├── haml.js
│ │ │ │ ├── handlebars.js
│ │ │ │ ├── haskell.js
│ │ │ │ ├── haxe.js
│ │ │ │ ├── html.js
│ │ │ │ ├── html_ruby.js
│ │ │ │ ├── ini.js
│ │ │ │ ├── io.js
│ │ │ │ ├── jack.js
│ │ │ │ ├── jade.js
│ │ │ │ ├── java.js
│ │ │ │ ├── javascript.js
│ │ │ │ ├── json.js
│ │ │ │ ├── jsoniq.js
│ │ │ │ ├── jsp.js
│ │ │ │ ├── jsx.js
│ │ │ │ ├── julia.js
│ │ │ │ ├── latex.js
│ │ │ │ ├── lean.js
│ │ │ │ ├── less.js
│ │ │ │ ├── liquid.js
│ │ │ │ ├── lisp.js
│ │ │ │ ├── live_script.js
│ │ │ │ ├── livescript.js
│ │ │ │ ├── logiql.js
│ │ │ │ ├── lsl.js
│ │ │ │ ├── lua.js
│ │ │ │ ├── luapage.js
│ │ │ │ ├── lucene.js
│ │ │ │ ├── makefile.js
│ │ │ │ ├── markdown.js
│ │ │ │ ├── mask.js
│ │ │ │ ├── matlab.js
│ │ │ │ ├── maze.js
│ │ │ │ ├── mel.js
│ │ │ │ ├── mips_assembler.js
│ │ │ │ ├── mipsassembler.js
│ │ │ │ ├── mushcode.js
│ │ │ │ ├── mysql.js
│ │ │ │ ├── nix.js
│ │ │ │ ├── objectivec.js
│ │ │ │ ├── ocaml.js
│ │ │ │ ├── pascal.js
│ │ │ │ ├── perl.js
│ │ │ │ ├── pgsql.js
│ │ │ │ ├── php.js
│ │ │ │ ├── plain_text.js
│ │ │ │ ├── powershell.js
│ │ │ │ ├── praat.js
│ │ │ │ ├── prolog.js
│ │ │ │ ├── properties.js
│ │ │ │ ├── protobuf.js
│ │ │ │ ├── python.js
│ │ │ │ ├── r.js
│ │ │ │ ├── rdoc.js
│ │ │ │ ├── rhtml.js
│ │ │ │ ├── ruby.js
│ │ │ │ ├── rust.js
│ │ │ │ ├── sass.js
│ │ │ │ ├── scad.js
│ │ │ │ ├── scala.js
│ │ │ │ ├── scheme.js
│ │ │ │ ├── scss.js
│ │ │ │ ├── sh.js
│ │ │ │ ├── sjs.js
│ │ │ │ ├── smarty.js
│ │ │ │ ├── snippets.js
│ │ │ │ ├── soy_template.js
│ │ │ │ ├── space.js
│ │ │ │ ├── sql.js
│ │ │ │ ├── sqlserver.js
│ │ │ │ ├── stylus.js
│ │ │ │ ├── svg.js
│ │ │ │ ├── tcl.js
│ │ │ │ ├── tex.js
│ │ │ │ ├── text.js
│ │ │ │ ├── textile.js
│ │ │ │ ├── toml.js
│ │ │ │ ├── twig.js
│ │ │ │ ├── typescript.js
│ │ │ │ ├── vala.js
│ │ │ │ ├── vbscript.js
│ │ │ │ ├── velocity.js
│ │ │ │ ├── verilog.js
│ │ │ │ ├── vhdl.js
│ │ │ │ ├── xml.js
│ │ │ │ ├── xquery.js
│ │ │ │ └── yaml.js
│ │ │ ├── theme-ambiance.js
│ │ │ ├── theme-chaos.js
│ │ │ ├── theme-chrome.js
│ │ │ ├── theme-clouds.js
│ │ │ ├── theme-clouds_midnight.js
│ │ │ ├── theme-cobalt.js
│ │ │ ├── theme-crimson_editor.js
│ │ │ ├── theme-dawn.js
│ │ │ ├── theme-dreamweaver.js
│ │ │ ├── theme-eclipse.js
│ │ │ ├── theme-github.js
│ │ │ ├── theme-idle_fingers.js
│ │ │ ├── theme-iplastic.js
│ │ │ ├── theme-katzenmilch.js
│ │ │ ├── theme-kr_theme.js
│ │ │ ├── theme-kuroir.js
│ │ │ ├── theme-merbivore.js
│ │ │ ├── theme-merbivore_soft.js
│ │ │ ├── theme-mono_industrial.js
│ │ │ ├── theme-monokai.js
│ │ │ ├── theme-pastel_on_dark.js
│ │ │ ├── theme-solarized_dark.js
│ │ │ ├── theme-solarized_light.js
│ │ │ ├── theme-sqlserver.js
│ │ │ ├── theme-terminal.js
│ │ │ ├── theme-textmate.js
│ │ │ ├── theme-tomorrow.js
│ │ │ ├── theme-tomorrow_night.js
│ │ │ ├── theme-tomorrow_night_blue.js
│ │ │ ├── theme-tomorrow_night_bright.js
│ │ │ ├── theme-tomorrow_night_eighties.js
│ │ │ ├── theme-twilight.js
│ │ │ ├── theme-vibrant_ink.js
│ │ │ ├── theme-xcode.js
│ │ │ ├── worker-coffee.js
│ │ │ ├── worker-css.js
│ │ │ ├── worker-html.js
│ │ │ ├── worker-javascript.js
│ │ │ ├── worker-json.js
│ │ │ ├── worker-lua.js
│ │ │ ├── worker-php.js
│ │ │ ├── worker-xml.js
│ │ │ └── worker-xquery.js
│ │ ├── bmenu/
│ │ │ ├── bmenu.css
│ │ │ └── bmenu.js
│ │ ├── dhtmlx/
│ │ │ ├── codebase/
│ │ │ │ ├── dhtmlx.css
│ │ │ │ ├── dhtmlx.js
│ │ │ │ ├── dhtmlx_deprecated.js
│ │ │ │ ├── ext/
│ │ │ │ │ ├── dhxupload.xap
│ │ │ │ │ ├── swfobject.js
│ │ │ │ │ └── uploader.swf
│ │ │ │ ├── imgs/
│ │ │ │ │ └── dhxgrid_skyblue/
│ │ │ │ │ └── tree/
│ │ │ │ │ └── Control panel_files/
│ │ │ │ │ ├── category(1).js
│ │ │ │ │ ├── category.js
│ │ │ │ │ ├── countdata.js
│ │ │ │ │ ├── dhtmlx_pro.js
│ │ │ │ │ ├── editor.js
│ │ │ │ │ ├── hostdata.js
│ │ │ │ │ ├── index(1).js
│ │ │ │ │ ├── index(2).js
│ │ │ │ │ ├── index.js
│ │ │ │ │ ├── logger.js
│ │ │ │ │ ├── main.js
│ │ │ │ │ └── require.js
│ │ │ │ └── thirdparty/
│ │ │ │ └── excanvas/
│ │ │ │ ├── AUTHORS
│ │ │ │ ├── COPYING
│ │ │ │ ├── README
│ │ │ │ └── excanvas.js
│ │ │ └── skins/
│ │ │ └── mytheme/
│ │ │ ├── dhtmlx.css
│ │ │ ├── imgs/
│ │ │ │ └── dhxvault_skyblue/
│ │ │ │ └── icons_licence.txt
│ │ │ └── readme.txt
│ │ ├── font-awesome/
│ │ │ └── fonts/
│ │ │ └── FontAwesome.otf
│ │ ├── jquery/
│ │ │ └── jquery.js
│ │ ├── laydate/
│ │ │ ├── README.md
│ │ │ ├── laydate.dev.js
│ │ │ ├── laydate.js
│ │ │ ├── need/
│ │ │ │ └── laydate.css
│ │ │ ├── skins/
│ │ │ │ ├── dahong/
│ │ │ │ │ └── laydate.css
│ │ │ │ ├── danlan/
│ │ │ │ │ └── laydate.css
│ │ │ │ ├── default/
│ │ │ │ │ └── laydate.css
│ │ │ │ ├── molv/
│ │ │ │ │ └── laydate.css
│ │ │ │ ├── qianhuang/
│ │ │ │ │ └── laydate.css
│ │ │ │ ├── yahui/
│ │ │ │ │ └── laydate.css
│ │ │ │ └── yalan/
│ │ │ │ └── laydate.css
│ │ │ └── test/
│ │ │ ├── demo1.html
│ │ │ ├── demo2.html
│ │ │ └── demo2.js
│ │ ├── layer/
│ │ │ ├── CHANGELOG.md
│ │ │ ├── GruntFile.js
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── bower.json
│ │ │ ├── extend/
│ │ │ │ └── layer.ext.js
│ │ │ ├── layer.js
│ │ │ ├── mobile/
│ │ │ │ ├── README.md
│ │ │ │ ├── layer.js
│ │ │ │ └── need/
│ │ │ │ └── layer.css
│ │ │ ├── package.json
│ │ │ ├── skin/
│ │ │ │ ├── layer.css
│ │ │ │ └── layer.ext.css
│ │ │ ├── src/
│ │ │ │ ├── README.md
│ │ │ │ ├── extend/
│ │ │ │ │ └── layer.ext.js
│ │ │ │ ├── layer.js
│ │ │ │ ├── mobile/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── layer.js
│ │ │ │ │ └── need/
│ │ │ │ │ └── layer.css
│ │ │ │ └── skin/
│ │ │ │ ├── layer.css
│ │ │ │ └── layer.ext.css
│ │ │ └── test/
│ │ │ └── demo.html
│ │ ├── react/
│ │ │ ├── react-dom.js
│ │ │ └── react.js
│ │ ├── semanticui/
│ │ │ ├── semantic.css
│ │ │ ├── semantic.js
│ │ │ └── themes/
│ │ │ └── default/
│ │ │ └── assets/
│ │ │ └── fonts/
│ │ │ └── icons.otf
│ │ ├── terminal/
│ │ │ ├── .gitignore
│ │ │ ├── .gitmodules
│ │ │ ├── css/
│ │ │ │ ├── jquery.terminal-1.1.1.css
│ │ │ │ └── jquery.terminal.css
│ │ │ └── js/
│ │ │ ├── jquery.terminal-min-1.1.1.js
│ │ │ └── jquery.terminal-min.js
│ │ └── toastr/
│ │ └── toastr.js
│ └── views/
│ ├── index.html
│ └── plugin.html
├── rainmap-lite/
│ ├── INSTALL.md
│ ├── LICENSE
│ ├── README.md
│ ├── manage.py
│ ├── nmaper/
│ │ ├── admin/
│ │ │ ├── __init__.py
│ │ │ └── views.py
│ │ ├── apps.py
│ │ ├── fixtures/
│ │ │ └── nmapprofiles.json
│ │ ├── forms.py
│ │ ├── migrations/
│ │ │ ├── 0001_initial.py
│ │ │ ├── 0002_nmapscan_email_text.py
│ │ │ ├── 0003_nmapprofile.py
│ │ │ ├── 0004_nmapscan_status_text.py
│ │ │ ├── 0005_nmapprofile_profilename_text.py
│ │ │ ├── 0006_auto_20160108_0128.py
│ │ │ ├── 0007_nmapscan_slug_text.py
│ │ │ ├── 0008_auto_20160108_0558.py
│ │ │ ├── 0009_auto_20160108_0613.py
│ │ │ ├── 0010_auto_20160108_0650.py
│ │ │ ├── 0011_auto_20160108_0702.py
│ │ │ ├── 0012_auto_20160109_0540.py
│ │ │ └── 0013_auto_20160111_0011.py
│ │ ├── models.py
│ │ ├── static/
│ │ │ ├── admin/
│ │ │ │ └── css/
│ │ │ │ ├── base.css
│ │ │ │ └── widgets.css
│ │ │ ├── css/
│ │ │ │ ├── ie8.css
│ │ │ │ ├── ie9.css
│ │ │ │ └── main.css
│ │ │ ├── fonts/
│ │ │ │ └── FontAwesome.otf
│ │ │ ├── js/
│ │ │ │ ├── ie/
│ │ │ │ │ └── html5shiv.js
│ │ │ │ ├── main.js
│ │ │ │ └── util.js
│ │ │ ├── results/
│ │ │ │ └── README.txt
│ │ │ └── sass/
│ │ │ ├── base/
│ │ │ │ ├── _page.scss
│ │ │ │ └── _typography.scss
│ │ │ ├── components/
│ │ │ │ ├── _box.scss
│ │ │ │ ├── _button.scss
│ │ │ │ ├── _features.scss
│ │ │ │ ├── _form.scss
│ │ │ │ ├── _icon.scss
│ │ │ │ ├── _image.scss
│ │ │ │ ├── _list.scss
│ │ │ │ ├── _section.scss
│ │ │ │ ├── _split.scss
│ │ │ │ ├── _spotlights.scss
│ │ │ │ ├── _table.scss
│ │ │ │ └── _wrapper.scss
│ │ │ ├── ie8.scss
│ │ │ ├── ie9.scss
│ │ │ ├── layout/
│ │ │ │ ├── _footer.scss
│ │ │ │ ├── _header.scss
│ │ │ │ ├── _intro.scss
│ │ │ │ ├── _sidebar.scss
│ │ │ │ └── _wrapper.scss
│ │ │ ├── libs/
│ │ │ │ ├── _functions.scss
│ │ │ │ ├── _mixins.scss
│ │ │ │ ├── _skel.scss
│ │ │ │ └── _vars.scss
│ │ │ └── main.scss
│ │ ├── templates/
│ │ │ ├── admin/
│ │ │ │ ├── base_site.html
│ │ │ │ ├── index.html
│ │ │ │ └── nmaper/
│ │ │ │ ├── nmap_scan/
│ │ │ │ │ └── change_list.html
│ │ │ │ └── nmapscan/
│ │ │ │ └── change_list.html
│ │ │ ├── index.html
│ │ │ └── registration/
│ │ │ └── login.html
│ │ ├── tests.py
│ │ └── views.py
│ ├── nmaper-cronjob.py
│ └── scandere/
│ ├── settings.py
│ ├── urls.py
│ └── wsgi.py
└── 乌云技术文章/
└── readme
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.sh linguist-language= bash
================================================
FILE: BruteXSS/License.txt
================================================
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
{one line to give the program's name and a brief idea of what it does.}
Copyright (C) {year} {name of author}
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
{project} Copyright (C) {year} {fullname}
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
.
================================================
FILE: BruteXSS/README.md
================================================
#BruteXSS
#The redistribute version is at https://github.com/ym2011/penetration/tree/master/BruteXSS
#there are more payload added into the file
BruteXSS - Cross-Site Scripting BruteForcer
Author: [Shawar Khan](https://shawarkhan.com)
**The BruteXSS project is sponsored and supported by [Netsparker Web Application Security Scanner](https://www.netsparker.com)**
Disclaimer: I am not responsible for any damage done using this tool. This tool should only be used for educational purposes and for penetration testing.
###Compatibility:
* Windows , Linux or any device running python 2.7
###Requirements:
* Python 2.7
* Wordlist included(wordlist.txt)
* Modules required: Colorama, Mechanize
###Modules Required:
* Colorama: https://pypi.python.org/pypi/colorama/
* Mechanize: https://pypi.python.org/pypi/mechanize/
###Description:
**BruteXSS** is a very powerful and fast Cross-Site Scripting Brutforcer which is used for bruteforcing a parameters. The BruteXSS injects multiple payloads loaded from a specified wordlist and fires them at the specified parameters and scans if any of the parameter is vulnerable to XSS vulnerability. BruteXSS is very accurate at doing its task and there is no chance of false positive as the scanning is much powerful. BruteXSS supports POST and GET requests which makes it compatible with the modern web applications.
###Features:
* XSS Bruteforcing
* XSS Scanning
* Supports GET/POST requests
* Custom wordlist can be included
* User-friendly UI
###Usage(GET Method):
```
COMMAND: python brutexss.py
METHOD: g
URL: http://www.site.com/?parameter=value
WORDLIST: wordlist.txt
```
###Usage(POST method):
```
COMMAND: python brutexss.py
METHOD: p
URL: http://www.site.com/file.php
POST DATA: parameter=value¶meter1=value1
WORDLIST: wordlist.txt
```
###Output:
```
____ _ __ ______ ____
| __ ) _ __ _ _| |_ ___ \ \/ / ___/ ___|
| _ \| '__| | | | __/ _ \ \ /\___ \___ \
| |_) | | | |_| | || __/ / \ ___) |__) |
|____/|_| \__,_|\__\___| /_/\_\____/____/
BruteXSS - Cross-Site Scripting BruteForcer
Author: Shawar Khan - https://shawarkhan.com
Select method: [G]ET or [P]OST (G/P): p
[?] Enter URL:
[?] > http://site.com/file.php
[+] Checking if site.com is available...
[+] site.com is available! Good!
[?] Enter post data: > parameter=value¶meter1=value1
[?] Enter location of Wordlist (Press Enter to use default wordlist.txt)
[?] > wordlist.txt
[+] Using Default wordlist...
[+] Loading Payloads from specified wordlist...
[+] 25 Payloads loaded...
[+] Injecting Payloads...
[+] Testing 'parameter' parameter...
[+] 2 / 25 payloads injected...
[!] XSS Vulnerability Found!
[!] Parameter: parameter
[!] Payload: ">
[+] Testing 'parameter1' parameter...
[+] 25 / 25 payloads injected...
[+] 'parameter1' parameter not vulnerable.
[+] 1 Parameter is vulnerable to XSS.
+----+--------------+----------------+
| Id | Parameters | Status |
+----+--------------+----------------+
| 0 | parameter | Vulnerable |
+----+--------------+----------------+
| 1 | parameter1 | Not Vulnerable |
+----+--------------+----------------+
```
================================================
FILE: BruteXSS/brutexss.py
================================================
#!/usr/bin/env python
#!BruteXSS
#!Cross-Site Scripting Bruteforcer
#!Author: Shawar Khan
#!Site: https://shawarkhan.com
from string import whitespace
import httplib
import urllib
import socket
import urlparse
import os
import sys
import time
from colorama import init , Style, Back,Fore
import mechanize
import httplib
init()
banner = """
____ _ __ ______ ____
| __ ) _ __ _ _| |_ ___ \ \/ / ___/ ___|
| _ \| '__| | | | __/ _ \ \ /\___ \___ \
| |_) | | | |_| | || __/ / \ ___) |__) |
|____/|_| \__,_|\__\___| /_/\_\____/____/
BruteXSS - Cross-Site Scripting BruteForcer
Author: Shawar Khan - https://shawarkhan.com
Sponsored & Supported by Netsparker Web Application Security Scanner ( https://www.netsparker.com )
Note: Using incorrect payloads in the custom
wordlist may give you false positives so its
better to use the wordlist which is already
provided for positive results.
"""
def brutexss():
if os.name == 'nt':
os.system('cls')
else:
os.system('clear')
print banner
def again():
inp = raw_input("[?] [E]xit or launch [A]gain? (e/a)").lower()
if inp == 'a':
brutexss()
elif inp == 'e':
exit()
else:
print("[!] Incorrect option selected")
again()
grey = Style.DIM+Fore.WHITE
def wordlistimport(file,lst):
try:
with open(file,'r') as f: #Importing Payloads from specified wordlist.
print(Style.DIM+Fore.WHITE+"[+] Loading Payloads from specified wordlist..."+Style.RESET_ALL)
for line in f:
final = str(line.replace("\n",""))
lst.append(final)
except IOError:
print(Style.BRIGHT+Fore.RED+"[!] Wordlist not found!"+Style.RESET_ALL)
again()
def bg(p,status):
try:
b = ""
l = ""
lostatus = ""
num = []
s = len(max(p, key=len)) #list
if s < 10:
s = 10
for i in range(len(p)): num.append(i)
maxval = str(len(num)) #number
for i in range(s) : b = b + "-"
for i in range(len(maxval)):l = l + "-"
statuslen = len(max(status, key=len))
for i in range(statuslen) : lostatus = lostatus + "-"
if len(b) < 10 :
b = "----------"
if len(lostatus) < 14:
lostatus="--------------"
if len(l) < 2 :
l = "--"
los = statuslen
if los < 14:
los = 14
lenb=len(str(len(b)))
if lenb < 14:
lenb = 10
else:
lenb = 20
upb = ("+-%s-+-%s-+-%s-+")%(l,b,lostatus)
print(upb)
st0 = "Parameters"
st1 = "Status"
print("| Id | "+st0.center(s," ")+" | "+st1.center(los," ")+" |")
print(upb)
for n,i,d in zip(num,p,status):
string = (" %s | %s ")%(str(n),str(i));
lofnum = str(n).center(int(len(l))," ")
lofstr = i.center(s," ")
lofst = d.center(los," ")
if "Not Vulnerable" in lofst:
lofst = Fore.GREEN+d.center(los," ")+Style.RESET_ALL
else:
lofst = Fore.RED+d.center(los," ")+Style.RESET_ALL
print("| "+lofnum+" | "+lofstr+" | "+lofst+" |")
print(upb)
return("")
except(ValueError):
print(Style.BRIGHT+Fore.RED+"[!] Uh oh! No parameters in URL!"+Style.RESET_ALL)
again()
def complete(p,r,c,d):
print("[+] Bruteforce Completed.")
if c == 0:
print("[+] Given parameters are "+Style.BRIGHT+Fore.GREEN+"not vulnerable"+Style.RESET_ALL+" to XSS.")
elif c ==1:
print("[+] %s Parameter is "+Style.BRIGHT+Fore.RED+"vulnerable"+Style.RESET_ALL+" to XSS.")%c
else:
print("[+] %s Parameters are "+Style.BRIGHT+Fore.RED+"vulnerable"+Style.RESET_ALL+" to XSS.")%c
print("[+] Scan Result for %s:")%d
print bg(p,r)
again()
def GET():
try:
try:
grey = Style.DIM+Fore.WHITE
site = raw_input("[?] Enter URL:\n[?] > ") #Taking URL
if 'https://' in site:
pass
elif 'http://' in site:
pass
else:
site = "http://"+site
finalurl = urlparse.urlparse(site)
urldata = urlparse.parse_qsl(finalurl.query)
domain0 = '{uri.scheme}://{uri.netloc}/'.format(uri=finalurl)
domain = domain0.replace("https://","").replace("http://","").replace("www.","").replace("/","")
print (Style.DIM+Fore.WHITE+"[+] Checking if "+domain+" is available..."+Style.RESET_ALL)
connection = httplib.HTTPConnection(domain)
connection.connect()
print("[+] "+Fore.GREEN+domain+" is available! Good!"+Style.RESET_ALL)
url = site
paraname = []
paravalue = []
wordlist = raw_input("[?] Enter location of Wordlist (Press Enter to use default wordlist.txt)\n[?] > ")
if len(wordlist) == 0:
wordlist = 'wordlist.txt'
print(grey+"[+] Using Default wordlist..."+Style.RESET_ALL)
else:
pass
payloads = []
wordlistimport(wordlist,payloads)
lop = str(len(payloads))
grey = Style.DIM+Fore.WHITE
print(Style.DIM+Fore.WHITE+"[+] "+lop+" Payloads loaded..."+Style.RESET_ALL)
print("[+] Bruteforce start:")
o = urlparse.urlparse(site)
parameters = urlparse.parse_qs(o.query,keep_blank_values=True)
path = urlparse.urlparse(site).scheme+"://"+urlparse.urlparse(site).netloc+urlparse.urlparse(site).path
for para in parameters: #Arranging parameters and values.
for i in parameters[para]:
paraname.append(para)
paravalue.append(i)
total = 0
c = 0
fpar = []
fresult = []
progress = 0
for pn, pv in zip(paraname,paravalue): #Scanning the parameter.
print(grey+"[+] Testing '"+pn+"' parameter..."+Style.RESET_ALL)
fpar.append(str(pn))
for x in payloads: #
validate = x.translate(None, whitespace)
if validate == "":
progress = progress + 1
else:
sys.stdout.write("\r[+] %i / %s payloads injected..."% (progress,len(payloads)))
sys.stdout.flush()
progress = progress + 1
enc = urllib.quote_plus(x)
data = path+"?"+pn+"="+pv+enc
page = urllib.urlopen(data)
sourcecode = page.read()
if x in sourcecode:
print(Style.BRIGHT+Fore.RED+"\n[!]"+" XSS Vulnerability Found! \n"+Fore.RED+Style.BRIGHT+"[!]"+" Parameter:\t%s\n"+Fore.RED+Style.BRIGHT+"[!]"+" Payload:\t%s"+Style.RESET_ALL)%(pn,x)
fresult.append(" Vulnerable ")
c = 1
total = total+1
progress = progress + 1
break
else:
c = 0
if c == 0:
print(Style.BRIGHT+Fore.GREEN+"\n[+]"+Style.RESET_ALL+Style.DIM+Fore.WHITE+" '%s' parameter not vulnerable."+Style.RESET_ALL)%pn
fresult.append("Not Vulnerable")
progress = progress + 1
pass
progress = 0
complete(fpar,fresult,total,domain)
except(httplib.HTTPResponse, socket.error) as Exit:
print(Style.BRIGHT+Fore.RED+"[!] Site "+domain+" is offline!"+Style.RESET_ALL)
again()
except(KeyboardInterrupt) as Exit:
print("\nExit...")
def POST():
try:
try:
try:
br = mechanize.Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11)Gecko/20071127 Firefox/2.0.0.11')]
br.set_handle_robots(False)
br.set_handle_refresh(False)
site = raw_input("[?] Enter URL:\n[?] > ") #Taking URL
if 'https://' in site:
pass
elif 'http://' in site:
pass
else:
site = "http://"+site
finalurl = urlparse.urlparse(site)
urldata = urlparse.parse_qsl(finalurl.query)
domain0 = '{uri.scheme}://{uri.netloc}/'.format(uri=finalurl)
domain = domain0.replace("https://","").replace("http://","").replace("www.","").replace("/","")
print (Style.DIM+Fore.WHITE+"[+] Checking if "+domain+" is available..."+Style.RESET_ALL)
connection = httplib.HTTPConnection(domain)
connection.connect()
print("[+] "+Fore.GREEN+domain+" is available! Good!"+Style.RESET_ALL)
path = urlparse.urlparse(site).scheme+"://"+urlparse.urlparse(site).netloc+urlparse.urlparse(site).path
url = site
param = str(raw_input("[?] Enter post data: > "))
wordlist = raw_input("[?] Enter location of Wordlist (Press Enter to use default wordlist.txt)\n[?] > ")
if len(wordlist) == 0:
wordlist = 'wordlist.txt'
print("[+] Using Default wordlist...")
else:
pass
payloads = []
wordlistimport(wordlist,payloads)
lop = str(len(payloads))
grey = Style.DIM+Fore.WHITE
print(Style.DIM+Fore.WHITE+"[+] "+lop+" Payloads loaded..."+Style.RESET_ALL)
print("[+] Bruteforce start:")
params = "http://www.site.com/?"+param
finalurl = urlparse.urlparse(params)
urldata = urlparse.parse_qsl(finalurl.query)
o = urlparse.urlparse(params)
parameters = urlparse.parse_qs(o.query,keep_blank_values=True)
paraname = []
paravalue = []
for para in parameters: #Arranging parameters and values.
for i in parameters[para]:
paraname.append(para)
paravalue.append(i)
fpar = []
fresult = []
total = 0
progress = 0
pname1 = [] #parameter name
payload1 = []
for pn, pv in zip(paraname,paravalue): #Scanning the parameter.
print(grey+"[+] Testing '"+pn+"' parameter..."+Style.RESET_ALL)
fpar.append(str(pn))
for i in payloads:
validate = i.translate(None, whitespace)
if validate == "":
progress = progress + 1
else:
progress = progress + 1
sys.stdout.write("\r[+] %i / %s payloads injected..."% (progress,len(payloads)))
sys.stdout.flush()
pname1.append(pn)
payload1.append(str(i))
d4rk = 0
for m in range(len(paraname)):
d = paraname[d4rk]
d1 = paravalue[d4rk]
tst= "".join(pname1)
tst1 = "".join(d)
if pn in d:
d4rk = d4rk + 1
else:
d4rk = d4rk +1
pname1.append(str(d))
payload1.append(str(d1))
data = urllib.urlencode(dict(zip(pname1,payload1)))
r = br.open(path, data)
sourcecode = r.read()
pname1 = []
payload1 = []
if i in sourcecode:
print(Style.BRIGHT+Fore.RED+"\n[!]"+" XSS Vulnerability Found! \n"+Fore.RED+Style.BRIGHT+"[!]"+" Parameter:\t%s\n"+Fore.RED+Style.BRIGHT+"[!]"+" Payload:\t%s"+Style.RESET_ALL)%(pn,i)
fresult.append(" Vulnerable ")
c = 1
total = total+1
progress = progress + 1
break
else:
c = 0
if c == 0:
print(Style.BRIGHT+Fore.GREEN+"\n[+]"+Style.RESET_ALL+Style.DIM+Fore.WHITE+" '%s' parameter not vulnerable."+Style.RESET_ALL)%pn
fresult.append("Not Vulnerable")
progress = progress + 1
pass
progress = 0
complete(fpar,fresult,total,domain)
except(httplib.HTTPResponse, socket.error) as Exit:
print(Style.BRIGHT+Fore.RED+"[!] Site "+domain+" is offline!"+Style.RESET_ALL)
again()
except(KeyboardInterrupt) as Exit:
print("\nExit...")
except (mechanize.HTTPError,mechanize.URLError) as e:
print(Style.BRIGHT+Fore.RED+"\n[!] HTTP ERROR! %s %s"+Style.RESET_ALL)%(e.code,e.reason)
try:
methodselect = raw_input("[?] Select method: [G]ET or [P]OST (G/P): ").lower()
if methodselect == 'g':
GET()
elif methodselect == 'p':
POST()
else:
print("[!] Incorrect method selected.")
again()
except(KeyboardInterrupt) as Exit:
print("\nExit...")
brutexss()
================================================
FILE: BruteXSS/colorama/__init__.py
================================================
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
from .initialise import init, deinit, reinit, colorama_text
from .ansi import Fore, Back, Style, Cursor
from .ansitowin32 import AnsiToWin32
__version__ = '0.3.7'
================================================
FILE: BruteXSS/colorama/ansi.py
================================================
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
'''
This module generates ANSI character codes to printing colors to terminals.
See: http://en.wikipedia.org/wiki/ANSI_escape_code
'''
CSI = '\033['
OSC = '\033]'
BEL = '\007'
def code_to_chars(code):
return CSI + str(code) + 'm'
def set_title(title):
return OSC + '2;' + title + BEL
def clear_screen(mode=2):
return CSI + str(mode) + 'J'
def clear_line(mode=2):
return CSI + str(mode) + 'K'
class AnsiCodes(object):
def __init__(self):
# the subclasses declare class attributes which are numbers.
# Upon instantiation we define instance attributes, which are the same
# as the class attributes but wrapped with the ANSI escape sequence
for name in dir(self):
if not name.startswith('_'):
value = getattr(self, name)
setattr(self, name, code_to_chars(value))
class AnsiCursor(object):
def UP(self, n=1):
return CSI + str(n) + 'A'
def DOWN(self, n=1):
return CSI + str(n) + 'B'
def FORWARD(self, n=1):
return CSI + str(n) + 'C'
def BACK(self, n=1):
return CSI + str(n) + 'D'
def POS(self, x=1, y=1):
return CSI + str(y) + ';' + str(x) + 'H'
class AnsiFore(AnsiCodes):
BLACK = 30
RED = 31
GREEN = 32
YELLOW = 33
BLUE = 34
MAGENTA = 35
CYAN = 36
WHITE = 37
RESET = 39
# These are fairly well supported, but not part of the standard.
LIGHTBLACK_EX = 90
LIGHTRED_EX = 91
LIGHTGREEN_EX = 92
LIGHTYELLOW_EX = 93
LIGHTBLUE_EX = 94
LIGHTMAGENTA_EX = 95
LIGHTCYAN_EX = 96
LIGHTWHITE_EX = 97
class AnsiBack(AnsiCodes):
BLACK = 40
RED = 41
GREEN = 42
YELLOW = 43
BLUE = 44
MAGENTA = 45
CYAN = 46
WHITE = 47
RESET = 49
# These are fairly well supported, but not part of the standard.
LIGHTBLACK_EX = 100
LIGHTRED_EX = 101
LIGHTGREEN_EX = 102
LIGHTYELLOW_EX = 103
LIGHTBLUE_EX = 104
LIGHTMAGENTA_EX = 105
LIGHTCYAN_EX = 106
LIGHTWHITE_EX = 107
class AnsiStyle(AnsiCodes):
BRIGHT = 1
DIM = 2
NORMAL = 22
RESET_ALL = 0
Fore = AnsiFore()
Back = AnsiBack()
Style = AnsiStyle()
Cursor = AnsiCursor()
================================================
FILE: BruteXSS/colorama/ansitowin32.py
================================================
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
import re
import sys
import os
from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style
from .winterm import WinTerm, WinColor, WinStyle
from .win32 import windll, winapi_test
winterm = None
if windll is not None:
winterm = WinTerm()
def is_stream_closed(stream):
return not hasattr(stream, 'closed') or stream.closed
def is_a_tty(stream):
return hasattr(stream, 'isatty') and stream.isatty()
class StreamWrapper(object):
'''
Wraps a stream (such as stdout), acting as a transparent proxy for all
attribute access apart from method 'write()', which is delegated to our
Converter instance.
'''
def __init__(self, wrapped, converter):
# double-underscore everything to prevent clashes with names of
# attributes on the wrapped stream object.
self.__wrapped = wrapped
self.__convertor = converter
def __getattr__(self, name):
return getattr(self.__wrapped, name)
def write(self, text):
self.__convertor.write(text)
class AnsiToWin32(object):
'''
Implements a 'write()' method which, on Windows, will strip ANSI character
sequences from the text, and if outputting to a tty, will convert them into
win32 function calls.
'''
ANSI_CSI_RE = re.compile('\001?\033\[((?:\d|;)*)([a-zA-Z])\002?') # Control Sequence Introducer
ANSI_OSC_RE = re.compile('\001?\033\]((?:.|;)*?)(\x07)\002?') # Operating System Command
def __init__(self, wrapped, convert=None, strip=None, autoreset=False):
# The wrapped stream (normally sys.stdout or sys.stderr)
self.wrapped = wrapped
# should we reset colors to defaults after every .write()
self.autoreset = autoreset
# create the proxy wrapping our output stream
self.stream = StreamWrapper(wrapped, self)
on_windows = os.name == 'nt'
# We test if the WinAPI works, because even if we are on Windows
# we may be using a terminal that doesn't support the WinAPI
# (e.g. Cygwin Terminal). In this case it's up to the terminal
# to support the ANSI codes.
conversion_supported = on_windows and winapi_test()
# should we strip ANSI sequences from our output?
if strip is None:
strip = conversion_supported or (not is_stream_closed(wrapped) and not is_a_tty(wrapped))
self.strip = strip
# should we should convert ANSI sequences into win32 calls?
if convert is None:
convert = conversion_supported and not is_stream_closed(wrapped) and is_a_tty(wrapped)
self.convert = convert
# dict of ansi codes to win32 functions and parameters
self.win32_calls = self.get_win32_calls()
# are we wrapping stderr?
self.on_stderr = self.wrapped is sys.stderr
def should_wrap(self):
'''
True if this class is actually needed. If false, then the output
stream will not be affected, nor will win32 calls be issued, so
wrapping stdout is not actually required. This will generally be
False on non-Windows platforms, unless optional functionality like
autoreset has been requested using kwargs to init()
'''
return self.convert or self.strip or self.autoreset
def get_win32_calls(self):
if self.convert and winterm:
return {
AnsiStyle.RESET_ALL: (winterm.reset_all, ),
AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT),
AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL),
AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL),
AnsiFore.BLACK: (winterm.fore, WinColor.BLACK),
AnsiFore.RED: (winterm.fore, WinColor.RED),
AnsiFore.GREEN: (winterm.fore, WinColor.GREEN),
AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW),
AnsiFore.BLUE: (winterm.fore, WinColor.BLUE),
AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA),
AnsiFore.CYAN: (winterm.fore, WinColor.CYAN),
AnsiFore.WHITE: (winterm.fore, WinColor.GREY),
AnsiFore.RESET: (winterm.fore, ),
AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True),
AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True),
AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True),
AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True),
AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True),
AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True),
AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True),
AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True),
AnsiBack.BLACK: (winterm.back, WinColor.BLACK),
AnsiBack.RED: (winterm.back, WinColor.RED),
AnsiBack.GREEN: (winterm.back, WinColor.GREEN),
AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW),
AnsiBack.BLUE: (winterm.back, WinColor.BLUE),
AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA),
AnsiBack.CYAN: (winterm.back, WinColor.CYAN),
AnsiBack.WHITE: (winterm.back, WinColor.GREY),
AnsiBack.RESET: (winterm.back, ),
AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True),
AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True),
AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True),
AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True),
AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True),
AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True),
AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True),
AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True),
}
return dict()
def write(self, text):
if self.strip or self.convert:
self.write_and_convert(text)
else:
self.wrapped.write(text)
self.wrapped.flush()
if self.autoreset:
self.reset_all()
def reset_all(self):
if self.convert:
self.call_win32('m', (0,))
elif not self.strip and not is_stream_closed(self.wrapped):
self.wrapped.write(Style.RESET_ALL)
def write_and_convert(self, text):
'''
Write the given text to our wrapped stream, stripping any ANSI
sequences from the text, and optionally converting them into win32
calls.
'''
cursor = 0
text = self.convert_osc(text)
for match in self.ANSI_CSI_RE.finditer(text):
start, end = match.span()
self.write_plain_text(text, cursor, start)
self.convert_ansi(*match.groups())
cursor = end
self.write_plain_text(text, cursor, len(text))
def write_plain_text(self, text, start, end):
if start < end:
self.wrapped.write(text[start:end])
self.wrapped.flush()
def convert_ansi(self, paramstring, command):
if self.convert:
params = self.extract_params(command, paramstring)
self.call_win32(command, params)
def extract_params(self, command, paramstring):
if command in 'Hf':
params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';'))
while len(params) < 2:
# defaults:
params = params + (1,)
else:
params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0)
if len(params) == 0:
# defaults:
if command in 'JKm':
params = (0,)
elif command in 'ABCD':
params = (1,)
return params
def call_win32(self, command, params):
if command == 'm':
for param in params:
if param in self.win32_calls:
func_args = self.win32_calls[param]
func = func_args[0]
args = func_args[1:]
kwargs = dict(on_stderr=self.on_stderr)
func(*args, **kwargs)
elif command in 'J':
winterm.erase_screen(params[0], on_stderr=self.on_stderr)
elif command in 'K':
winterm.erase_line(params[0], on_stderr=self.on_stderr)
elif command in 'Hf': # cursor position - absolute
winterm.set_cursor_position(params, on_stderr=self.on_stderr)
elif command in 'ABCD': # cursor position - relative
n = params[0]
# A - up, B - down, C - forward, D - back
x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command]
winterm.cursor_adjust(x, y, on_stderr=self.on_stderr)
def convert_osc(self, text):
for match in self.ANSI_OSC_RE.finditer(text):
start, end = match.span()
text = text[:start] + text[end:]
paramstring, command = match.groups()
if command in '\x07': # \x07 = BEL
params = paramstring.split(";")
# 0 - change title and icon (we will only change title)
# 1 - change icon (we don't support this)
# 2 - change title
if params[0] in '02':
winterm.set_title(params[1])
return text
================================================
FILE: BruteXSS/colorama/initialise.py
================================================
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
import atexit
import contextlib
import sys
from .ansitowin32 import AnsiToWin32
orig_stdout = None
orig_stderr = None
wrapped_stdout = None
wrapped_stderr = None
atexit_done = False
def reset_all():
if AnsiToWin32 is not None: # Issue #74: objects might become None at exit
AnsiToWin32(orig_stdout).reset_all()
def init(autoreset=False, convert=None, strip=None, wrap=True):
if not wrap and any([autoreset, convert, strip]):
raise ValueError('wrap=False conflicts with any other arg=True')
global wrapped_stdout, wrapped_stderr
global orig_stdout, orig_stderr
orig_stdout = sys.stdout
orig_stderr = sys.stderr
if sys.stdout is None:
wrapped_stdout = None
else:
sys.stdout = wrapped_stdout = \
wrap_stream(orig_stdout, convert, strip, autoreset, wrap)
if sys.stderr is None:
wrapped_stderr = None
else:
sys.stderr = wrapped_stderr = \
wrap_stream(orig_stderr, convert, strip, autoreset, wrap)
global atexit_done
if not atexit_done:
atexit.register(reset_all)
atexit_done = True
def deinit():
if orig_stdout is not None:
sys.stdout = orig_stdout
if orig_stderr is not None:
sys.stderr = orig_stderr
@contextlib.contextmanager
def colorama_text(*args, **kwargs):
init(*args, **kwargs)
try:
yield
finally:
deinit()
def reinit():
if wrapped_stdout is not None:
sys.stdout = wrapped_stdout
if wrapped_stderr is not None:
sys.stderr = wrapped_stderr
def wrap_stream(stream, convert, strip, autoreset, wrap):
if wrap:
wrapper = AnsiToWin32(stream,
convert=convert, strip=strip, autoreset=autoreset)
if wrapper.should_wrap():
stream = wrapper.stream
return stream
================================================
FILE: BruteXSS/colorama/win32.py
================================================
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
# from winbase.h
STDOUT = -11
STDERR = -12
try:
import ctypes
from ctypes import LibraryLoader
windll = LibraryLoader(ctypes.WinDLL)
from ctypes import wintypes
except (AttributeError, ImportError):
windll = None
SetConsoleTextAttribute = lambda *_: None
winapi_test = lambda *_: None
else:
from ctypes import byref, Structure, c_char, POINTER
COORD = wintypes._COORD
class CONSOLE_SCREEN_BUFFER_INFO(Structure):
"""struct in wincon.h."""
_fields_ = [
("dwSize", COORD),
("dwCursorPosition", COORD),
("wAttributes", wintypes.WORD),
("srWindow", wintypes.SMALL_RECT),
("dwMaximumWindowSize", COORD),
]
def __str__(self):
return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % (
self.dwSize.Y, self.dwSize.X
, self.dwCursorPosition.Y, self.dwCursorPosition.X
, self.wAttributes
, self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right
, self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X
)
_GetStdHandle = windll.kernel32.GetStdHandle
_GetStdHandle.argtypes = [
wintypes.DWORD,
]
_GetStdHandle.restype = wintypes.HANDLE
_GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo
_GetConsoleScreenBufferInfo.argtypes = [
wintypes.HANDLE,
POINTER(CONSOLE_SCREEN_BUFFER_INFO),
]
_GetConsoleScreenBufferInfo.restype = wintypes.BOOL
_SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute
_SetConsoleTextAttribute.argtypes = [
wintypes.HANDLE,
wintypes.WORD,
]
_SetConsoleTextAttribute.restype = wintypes.BOOL
_SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition
_SetConsoleCursorPosition.argtypes = [
wintypes.HANDLE,
COORD,
]
_SetConsoleCursorPosition.restype = wintypes.BOOL
_FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA
_FillConsoleOutputCharacterA.argtypes = [
wintypes.HANDLE,
c_char,
wintypes.DWORD,
COORD,
POINTER(wintypes.DWORD),
]
_FillConsoleOutputCharacterA.restype = wintypes.BOOL
_FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute
_FillConsoleOutputAttribute.argtypes = [
wintypes.HANDLE,
wintypes.WORD,
wintypes.DWORD,
COORD,
POINTER(wintypes.DWORD),
]
_FillConsoleOutputAttribute.restype = wintypes.BOOL
_SetConsoleTitleW = windll.kernel32.SetConsoleTitleA
_SetConsoleTitleW.argtypes = [
wintypes.LPCSTR
]
_SetConsoleTitleW.restype = wintypes.BOOL
handles = {
STDOUT: _GetStdHandle(STDOUT),
STDERR: _GetStdHandle(STDERR),
}
def winapi_test():
handle = handles[STDOUT]
csbi = CONSOLE_SCREEN_BUFFER_INFO()
success = _GetConsoleScreenBufferInfo(
handle, byref(csbi))
return bool(success)
def GetConsoleScreenBufferInfo(stream_id=STDOUT):
handle = handles[stream_id]
csbi = CONSOLE_SCREEN_BUFFER_INFO()
success = _GetConsoleScreenBufferInfo(
handle, byref(csbi))
return csbi
def SetConsoleTextAttribute(stream_id, attrs):
handle = handles[stream_id]
return _SetConsoleTextAttribute(handle, attrs)
def SetConsoleCursorPosition(stream_id, position, adjust=True):
position = COORD(*position)
# If the position is out of range, do nothing.
if position.Y <= 0 or position.X <= 0:
return
# Adjust for Windows' SetConsoleCursorPosition:
# 1. being 0-based, while ANSI is 1-based.
# 2. expecting (x,y), while ANSI uses (y,x).
adjusted_position = COORD(position.Y - 1, position.X - 1)
if adjust:
# Adjust for viewport's scroll position
sr = GetConsoleScreenBufferInfo(STDOUT).srWindow
adjusted_position.Y += sr.Top
adjusted_position.X += sr.Left
# Resume normal processing
handle = handles[stream_id]
return _SetConsoleCursorPosition(handle, adjusted_position)
def FillConsoleOutputCharacter(stream_id, char, length, start):
handle = handles[stream_id]
char = c_char(char.encode())
length = wintypes.DWORD(length)
num_written = wintypes.DWORD(0)
# Note that this is hard-coded for ANSI (vs wide) bytes.
success = _FillConsoleOutputCharacterA(
handle, char, length, start, byref(num_written))
return num_written.value
def FillConsoleOutputAttribute(stream_id, attr, length, start):
''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )'''
handle = handles[stream_id]
attribute = wintypes.WORD(attr)
length = wintypes.DWORD(length)
num_written = wintypes.DWORD(0)
# Note that this is hard-coded for ANSI (vs wide) bytes.
return _FillConsoleOutputAttribute(
handle, attribute, length, start, byref(num_written))
def SetConsoleTitle(title):
return _SetConsoleTitleW(title)
================================================
FILE: BruteXSS/colorama/winterm.py
================================================
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
from . import win32
# from wincon.h
class WinColor(object):
BLACK = 0
BLUE = 1
GREEN = 2
CYAN = 3
RED = 4
MAGENTA = 5
YELLOW = 6
GREY = 7
# from wincon.h
class WinStyle(object):
NORMAL = 0x00 # dim text, dim background
BRIGHT = 0x08 # bright text, dim background
BRIGHT_BACKGROUND = 0x80 # dim text, bright background
class WinTerm(object):
def __init__(self):
self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes
self.set_attrs(self._default)
self._default_fore = self._fore
self._default_back = self._back
self._default_style = self._style
# In order to emulate LIGHT_EX in windows, we borrow the BRIGHT style.
# So that LIGHT_EX colors and BRIGHT style do not clobber each other,
# we track them separately, since LIGHT_EX is overwritten by Fore/Back
# and BRIGHT is overwritten by Style codes.
self._light = 0
def get_attrs(self):
return self._fore + self._back * 16 + (self._style | self._light)
def set_attrs(self, value):
self._fore = value & 7
self._back = (value >> 4) & 7
self._style = value & (WinStyle.BRIGHT | WinStyle.BRIGHT_BACKGROUND)
def reset_all(self, on_stderr=None):
self.set_attrs(self._default)
self.set_console(attrs=self._default)
def fore(self, fore=None, light=False, on_stderr=False):
if fore is None:
fore = self._default_fore
self._fore = fore
# Emulate LIGHT_EX with BRIGHT Style
if light:
self._light |= WinStyle.BRIGHT
else:
self._light &= ~WinStyle.BRIGHT
self.set_console(on_stderr=on_stderr)
def back(self, back=None, light=False, on_stderr=False):
if back is None:
back = self._default_back
self._back = back
# Emulate LIGHT_EX with BRIGHT_BACKGROUND Style
if light:
self._light |= WinStyle.BRIGHT_BACKGROUND
else:
self._light &= ~WinStyle.BRIGHT_BACKGROUND
self.set_console(on_stderr=on_stderr)
def style(self, style=None, on_stderr=False):
if style is None:
style = self._default_style
self._style = style
self.set_console(on_stderr=on_stderr)
def set_console(self, attrs=None, on_stderr=False):
if attrs is None:
attrs = self.get_attrs()
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
win32.SetConsoleTextAttribute(handle, attrs)
def get_position(self, handle):
position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition
# Because Windows coordinates are 0-based,
# and win32.SetConsoleCursorPosition expects 1-based.
position.X += 1
position.Y += 1
return position
def set_cursor_position(self, position=None, on_stderr=False):
if position is None:
# I'm not currently tracking the position, so there is no default.
# position = self.get_position()
return
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
win32.SetConsoleCursorPosition(handle, position)
def cursor_adjust(self, x, y, on_stderr=False):
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
position = self.get_position(handle)
adjusted_position = (position.Y + y, position.X + x)
win32.SetConsoleCursorPosition(handle, adjusted_position, adjust=False)
def erase_screen(self, mode=0, on_stderr=False):
# 0 should clear from the cursor to the end of the screen.
# 1 should clear from the cursor to the beginning of the screen.
# 2 should clear the entire screen, and move cursor to (1,1)
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
csbi = win32.GetConsoleScreenBufferInfo(handle)
# get the number of character cells in the current buffer
cells_in_screen = csbi.dwSize.X * csbi.dwSize.Y
# get number of character cells before current cursor position
cells_before_cursor = csbi.dwSize.X * csbi.dwCursorPosition.Y + csbi.dwCursorPosition.X
if mode == 0:
from_coord = csbi.dwCursorPosition
cells_to_erase = cells_in_screen - cells_before_cursor
if mode == 1:
from_coord = win32.COORD(0, 0)
cells_to_erase = cells_before_cursor
elif mode == 2:
from_coord = win32.COORD(0, 0)
cells_to_erase = cells_in_screen
# fill the entire screen with blanks
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
# now set the buffer's attributes accordingly
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
if mode == 2:
# put the cursor where needed
win32.SetConsoleCursorPosition(handle, (1, 1))
def erase_line(self, mode=0, on_stderr=False):
# 0 should clear from the cursor to the end of the line.
# 1 should clear from the cursor to the beginning of the line.
# 2 should clear the entire line.
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
csbi = win32.GetConsoleScreenBufferInfo(handle)
if mode == 0:
from_coord = csbi.dwCursorPosition
cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X
if mode == 1:
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
cells_to_erase = csbi.dwCursorPosition.X
elif mode == 2:
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
cells_to_erase = csbi.dwSize.X
# fill the entire screen with blanks
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
# now set the buffer's attributes accordingly
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
def set_title(self, title):
win32.SetConsoleTitle(title)
================================================
FILE: BruteXSS/mechanize/__init__.py
================================================
__all__ = [
'AbstractBasicAuthHandler',
'AbstractDigestAuthHandler',
'BaseHandler',
'Browser',
'BrowserStateError',
'CacheFTPHandler',
'ContentTooShortError',
'Cookie',
'CookieJar',
'CookiePolicy',
'DefaultCookiePolicy',
'DefaultFactory',
'FTPHandler',
'Factory',
'FileCookieJar',
'FileHandler',
'FormNotFoundError',
'FormsFactory',
'HTTPBasicAuthHandler',
'HTTPCookieProcessor',
'HTTPDefaultErrorHandler',
'HTTPDigestAuthHandler',
'HTTPEquivProcessor',
'HTTPError',
'HTTPErrorProcessor',
'HTTPHandler',
'HTTPPasswordMgr',
'HTTPPasswordMgrWithDefaultRealm',
'HTTPProxyPasswordMgr',
'HTTPRedirectDebugProcessor',
'HTTPRedirectHandler',
'HTTPRefererProcessor',
'HTTPRefreshProcessor',
'HTTPResponseDebugProcessor',
'HTTPRobotRulesProcessor',
'HTTPSClientCertMgr',
'HeadParser',
'History',
'LWPCookieJar',
'Link',
'LinkNotFoundError',
'LinksFactory',
'LoadError',
'MSIECookieJar',
'MozillaCookieJar',
'OpenerDirector',
'OpenerFactory',
'ParseError',
'ProxyBasicAuthHandler',
'ProxyDigestAuthHandler',
'ProxyHandler',
'Request',
'RobotExclusionError',
'RobustFactory',
'RobustFormsFactory',
'RobustLinksFactory',
'RobustTitleFactory',
'SeekableResponseOpener',
'TitleFactory',
'URLError',
'USE_BARE_EXCEPT',
'UnknownHandler',
'UserAgent',
'UserAgentBase',
'XHTMLCompatibleHeadParser',
'__version__',
'build_opener',
'install_opener',
'lwp_cookie_str',
'make_response',
'request_host',
'response_seek_wrapper', # XXX deprecate in public interface?
'seek_wrapped_response', # XXX should probably use this internally in place of response_seek_wrapper()
'str2time',
'urlopen',
'urlretrieve',
'urljoin',
# ClientForm API
'AmbiguityError',
'ControlNotFoundError',
'FormParser',
'ItemCountError',
'ItemNotFoundError',
'LocateError',
'Missing',
'ParseFile',
'ParseFileEx',
'ParseResponse',
'ParseResponseEx',
'ParseString',
'XHTMLCompatibleFormParser',
# deprecated
'CheckboxControl',
'Control',
'FileControl',
'HTMLForm',
'HiddenControl',
'IgnoreControl',
'ImageControl',
'IsindexControl',
'Item',
'Label',
'ListControl',
'PasswordControl',
'RadioControl',
'ScalarControl',
'SelectControl',
'SubmitButtonControl',
'SubmitControl',
'TextControl',
'TextareaControl',
]
import logging
import sys
from _version import __version__
# high-level stateful browser-style interface
from _mechanize import \
Browser, History, \
BrowserStateError, LinkNotFoundError, FormNotFoundError
# configurable URL-opener interface
from _useragent import UserAgentBase, UserAgent
from _html import \
Link, \
Factory, DefaultFactory, RobustFactory, \
FormsFactory, LinksFactory, TitleFactory, \
RobustFormsFactory, RobustLinksFactory, RobustTitleFactory
# urllib2 work-alike interface. This is a superset of the urllib2 interface.
from _urllib2 import *
import _urllib2
if hasattr(_urllib2, "HTTPSHandler"):
__all__.append("HTTPSHandler")
del _urllib2
# misc
from _http import HeadParser
from _http import XHTMLCompatibleHeadParser
from _opener import ContentTooShortError, OpenerFactory, urlretrieve
from _response import \
response_seek_wrapper, seek_wrapped_response, make_response
from _rfc3986 import urljoin
from _util import http2time as str2time
# cookies
from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \
CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \
effective_request_host
from _lwpcookiejar import LWPCookieJar, lwp_cookie_str
# 2.4 raises SyntaxError due to generator / try/finally use
if sys.version_info[:2] > (2,4):
try:
import sqlite3
except ImportError:
pass
else:
from _firefox3cookiejar import Firefox3CookieJar
from _mozillacookiejar import MozillaCookieJar
from _msiecookiejar import MSIECookieJar
# forms
from _form import (
AmbiguityError,
ControlNotFoundError,
FormParser,
ItemCountError,
ItemNotFoundError,
LocateError,
Missing,
ParseError,
ParseFile,
ParseFileEx,
ParseResponse,
ParseResponseEx,
ParseString,
XHTMLCompatibleFormParser,
# deprecated
CheckboxControl,
Control,
FileControl,
HTMLForm,
HiddenControl,
IgnoreControl,
ImageControl,
IsindexControl,
Item,
Label,
ListControl,
PasswordControl,
RadioControl,
ScalarControl,
SelectControl,
SubmitButtonControl,
SubmitControl,
TextControl,
TextareaControl,
)
# If you hate the idea of turning bugs into warnings, do:
# import mechanize; mechanize.USE_BARE_EXCEPT = False
USE_BARE_EXCEPT = True
logger = logging.getLogger("mechanize")
if logger.level is logging.NOTSET:
logger.setLevel(logging.CRITICAL)
del logger
================================================
FILE: BruteXSS/mechanize/_auth.py
================================================
"""HTTP Authentication and Proxy support.
Copyright 2006 John J. Lee
This code is free software; you can redistribute it and/or modify it under
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
included with the distribution).
"""
from _urllib2_fork import HTTPPasswordMgr
# TODO: stop deriving from HTTPPasswordMgr
class HTTPProxyPasswordMgr(HTTPPasswordMgr):
# has default realm and host/port
def add_password(self, realm, uri, user, passwd):
# uri could be a single URI or a sequence
if uri is None or isinstance(uri, basestring):
uris = [uri]
else:
uris = uri
passwd_by_domain = self.passwd.setdefault(realm, {})
for uri in uris:
for default_port in True, False:
reduced_uri = self.reduce_uri(uri, default_port)
passwd_by_domain[reduced_uri] = (user, passwd)
def find_user_password(self, realm, authuri):
attempts = [(realm, authuri), (None, authuri)]
# bleh, want default realm to take precedence over default
# URI/authority, hence this outer loop
for default_uri in False, True:
for realm, authuri in attempts:
authinfo_by_domain = self.passwd.get(realm, {})
for default_port in True, False:
reduced_authuri = self.reduce_uri(authuri, default_port)
for uri, authinfo in authinfo_by_domain.iteritems():
if uri is None and not default_uri:
continue
if self.is_suburi(uri, reduced_authuri):
return authinfo
user, password = None, None
if user is not None:
break
return user, password
def reduce_uri(self, uri, default_port=True):
if uri is None:
return None
return HTTPPasswordMgr.reduce_uri(self, uri, default_port)
def is_suburi(self, base, test):
if base is None:
# default to the proxy's host/port
hostport, path = test
base = (hostport, "/")
return HTTPPasswordMgr.is_suburi(self, base, test)
class HTTPSClientCertMgr(HTTPPasswordMgr):
# implementation inheritance: this is not a proper subclass
def add_key_cert(self, uri, key_file, cert_file):
self.add_password(None, uri, key_file, cert_file)
def find_key_cert(self, authuri):
return HTTPPasswordMgr.find_user_password(self, None, authuri)
================================================
FILE: BruteXSS/mechanize/_beautifulsoup.py
================================================
"""Beautiful Soup
Elixir and Tonic
"The Screen-Scraper's Friend"
v2.1.1
http://www.crummy.com/software/BeautifulSoup/
Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance
into a tree representation. It provides methods and Pythonic idioms
that make it easy to search and modify the tree.
A well-formed XML/HTML document will yield a well-formed data
structure. An ill-formed XML/HTML document will yield a
correspondingly ill-formed data structure. If your document is only
locally well-formed, you can use this library to find and process the
well-formed part of it. The BeautifulSoup class has heuristics for
obtaining a sensible parse tree in the face of common HTML errors.
Beautiful Soup has no external dependencies. It works with Python 2.2
and up.
Beautiful Soup defines classes for four different parsing strategies:
* BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
language that kind of looks like XML.
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
or invalid.
* ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML
that trips up BeautifulSoup.
* BeautifulSOAP, for making it easier to parse XML documents that use
lots of subelements containing a single string, where you'd prefer
they put that string into an attribute (such as SOAP messages).
You can subclass BeautifulStoneSoup or BeautifulSoup to create a
parsing strategy specific to an XML schema or a particular bizarre
HTML document. Typically your subclass would just override
SELF_CLOSING_TAGS and/or NESTABLE_TAGS.
""" #"
from __future__ import generators
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "2.1.1"
__date__ = "$Date: 2004/10/18 00:14:20 $"
__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson"
__license__ = "PSF"
from _sgmllib_copy import SGMLParser, SGMLParseError
import types
import re
import _sgmllib_copy as sgmllib
class NullType(object):
"""Similar to NoneType with a corresponding singleton instance
'Null' that, unlike None, accepts any message and returns itself.
Examples:
>>> Null("send", "a", "message")("and one more",
... "and what you get still") is Null
True
"""
def __new__(cls): return Null
def __call__(self, *args, **kwargs): return Null
## def __getstate__(self, *args): return Null
def __getattr__(self, attr): return Null
def __getitem__(self, item): return Null
def __setattr__(self, attr, value): pass
def __setitem__(self, item, value): pass
def __len__(self): return 0
# FIXME: is this a python bug? otherwise ``for x in Null: pass``
# never terminates...
def __iter__(self): return iter([])
def __contains__(self, item): return False
def __repr__(self): return "Null"
Null = object.__new__(NullType)
class PageElement:
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
def setup(self, parent=Null, previous=Null):
"""Sets up the initial relations between this element and
other elements."""
self.parent = parent
self.previous = previous
self.next = Null
self.previousSibling = Null
self.nextSibling = Null
if self.parent and self.parent.contents:
self.previousSibling = self.parent.contents[-1]
self.previousSibling.nextSibling = self
def findNext(self, name=None, attrs={}, text=None):
"""Returns the first item that matches the given criteria and
appears after this Tag in the document."""
return self._first(self.fetchNext, name, attrs, text)
firstNext = findNext
def fetchNext(self, name=None, attrs={}, text=None, limit=None):
"""Returns all items that match the given criteria and appear
before after Tag in the document."""
return self._fetch(name, attrs, text, limit, self.nextGenerator)
def findNextSibling(self, name=None, attrs={}, text=None):
"""Returns the closest sibling to this Tag that matches the
given criteria and appears after this Tag in the document."""
return self._first(self.fetchNextSiblings, name, attrs, text)
firstNextSibling = findNextSibling
def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None):
"""Returns the siblings of this Tag that match the given
criteria and appear after this Tag in the document."""
return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator)
def findPrevious(self, name=None, attrs={}, text=None):
"""Returns the first item that matches the given criteria and
appears before this Tag in the document."""
return self._first(self.fetchPrevious, name, attrs, text)
def fetchPrevious(self, name=None, attrs={}, text=None, limit=None):
"""Returns all items that match the given criteria and appear
before this Tag in the document."""
return self._fetch(name, attrs, text, limit, self.previousGenerator)
firstPrevious = findPrevious
def findPreviousSibling(self, name=None, attrs={}, text=None):
"""Returns the closest sibling to this Tag that matches the
given criteria and appears before this Tag in the document."""
return self._first(self.fetchPreviousSiblings, name, attrs, text)
firstPreviousSibling = findPreviousSibling
def fetchPreviousSiblings(self, name=None, attrs={}, text=None,
limit=None):
"""Returns the siblings of this Tag that match the given
criteria and appear before this Tag in the document."""
return self._fetch(name, attrs, text, limit,
self.previousSiblingGenerator)
def findParent(self, name=None, attrs={}):
"""Returns the closest parent of this Tag that matches the given
criteria."""
r = Null
l = self.fetchParents(name, attrs, 1)
if l:
r = l[0]
return r
firstParent = findParent
def fetchParents(self, name=None, attrs={}, limit=None):
"""Returns the parents of this Tag that match the given
criteria."""
return self._fetch(name, attrs, None, limit, self.parentGenerator)
#These methods do the real heavy lifting.
def _first(self, method, name, attrs, text):
r = Null
l = method(name, attrs, text, 1)
if l:
r = l[0]
return r
def _fetch(self, name, attrs, text, limit, generator):
"Iterates over a generator looking for things that match."
if not hasattr(attrs, 'items'):
attrs = {'class' : attrs}
results = []
g = generator()
while True:
try:
i = g.next()
except StopIteration:
break
found = None
if isinstance(i, Tag):
if not text:
if not name or self._matches(i, name):
match = True
for attr, matchAgainst in attrs.items():
check = i.get(attr)
if not self._matches(check, matchAgainst):
match = False
break
if match:
found = i
elif text:
if self._matches(i, text):
found = i
if found:
results.append(found)
if limit and len(results) >= limit:
break
return results
#Generators that can be used to navigate starting from both
#NavigableTexts and Tags.
def nextGenerator(self):
i = self
while i:
i = i.next
yield i
def nextSiblingGenerator(self):
i = self
while i:
i = i.nextSibling
yield i
def previousGenerator(self):
i = self
while i:
i = i.previous
yield i
def previousSiblingGenerator(self):
i = self
while i:
i = i.previousSibling
yield i
def parentGenerator(self):
i = self
while i:
i = i.parent
yield i
def _matches(self, chunk, howToMatch):
#print 'looking for %s in %s' % (howToMatch, chunk)
#
# If given a list of items, return true if the list contains a
# text element that matches.
if isList(chunk) and not isinstance(chunk, Tag):
for tag in chunk:
if isinstance(tag, NavigableText) and self._matches(tag, howToMatch):
return True
return False
if callable(howToMatch):
return howToMatch(chunk)
if isinstance(chunk, Tag):
#Custom match methods take the tag as an argument, but all other
#ways of matching match the tag name as a string
chunk = chunk.name
#Now we know that chunk is a string
if not isinstance(chunk, basestring):
chunk = str(chunk)
if hasattr(howToMatch, 'match'):
# It's a regexp object.
return howToMatch.search(chunk)
if isList(howToMatch):
return chunk in howToMatch
if hasattr(howToMatch, 'items'):
return howToMatch.has_key(chunk)
#It's just a string
return str(howToMatch) == chunk
class NavigableText(PageElement):
def __getattr__(self, attr):
"For backwards compatibility, text.string gives you text"
if attr == 'string':
return self
else:
raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
class NavigableString(str, NavigableText):
pass
class NavigableUnicodeString(unicode, NavigableText):
pass
class Tag(PageElement):
"""Represents a found HTML tag with its attributes and contents."""
def __init__(self, name, attrs=None, parent=Null, previous=Null):
"Basic constructor."
self.name = name
if attrs == None:
attrs = []
self.attrs = attrs
self.contents = []
self.setup(parent, previous)
self.hidden = False
def get(self, key, default=None):
"""Returns the value of the 'key' attribute for the tag, or
the value given for 'default' if it doesn't have that
attribute."""
return self._getAttrMap().get(key, default)
def __getitem__(self, key):
"""tag[key] returns the value of the 'key' attribute for the tag,
and throws an exception if it's not there."""
return self._getAttrMap()[key]
def __iter__(self):
"Iterating over a tag iterates over its contents."
return iter(self.contents)
def __len__(self):
"The length of a tag is the length of its list of contents."
return len(self.contents)
def __contains__(self, x):
return x in self.contents
def __nonzero__(self):
"A tag is non-None even if it has no contents."
return True
def __setitem__(self, key, value):
"""Setting tag[key] sets the value of the 'key' attribute for the
tag."""
self._getAttrMap()
self.attrMap[key] = value
found = False
for i in range(0, len(self.attrs)):
if self.attrs[i][0] == key:
self.attrs[i] = (key, value)
found = True
if not found:
self.attrs.append((key, value))
self._getAttrMap()[key] = value
def __delitem__(self, key):
"Deleting tag[key] deletes all 'key' attributes for the tag."
for item in self.attrs:
if item[0] == key:
self.attrs.remove(item)
#We don't break because bad HTML can define the same
#attribute multiple times.
self._getAttrMap()
if self.attrMap.has_key(key):
del self.attrMap[key]
def __call__(self, *args, **kwargs):
"""Calling a tag like a function is the same as calling its
fetch() method. Eg. tag('a') returns a list of all the A tags
found within this tag."""
return apply(self.fetch, args, kwargs)
def __getattr__(self, tag):
if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
return self.first(tag[:-3])
elif tag.find('__') != 0:
return self.first(tag)
def __eq__(self, other):
"""Returns true iff this tag has the same name, the same attributes,
and the same contents (recursively) as the given tag.
NOTE: right now this will return false if two tags have the
same attributes in a different order. Should this be fixed?"""
if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
return False
for i in range(0, len(self.contents)):
if self.contents[i] != other.contents[i]:
return False
return True
def __ne__(self, other):
"""Returns true iff this tag is not identical to the other tag,
as defined in __eq__."""
return not self == other
def __repr__(self):
"""Renders this tag as a string."""
return str(self)
def __unicode__(self):
return self.__str__(1)
def __str__(self, needUnicode=None, showStructureIndent=None):
"""Returns a string or Unicode representation of this tag and
its contents.
NOTE: since Python's HTML parser consumes whitespace, this
method is not certain to reproduce the whitespace present in
the original string."""
attrs = []
if self.attrs:
for key, val in self.attrs:
attrs.append('%s="%s"' % (key, val))
close = ''
closeTag = ''
if self.isSelfClosing():
close = ' /'
else:
closeTag = '%s>' % self.name
indentIncrement = None
if showStructureIndent != None:
indentIncrement = showStructureIndent
if not self.hidden:
indentIncrement += 1
contents = self.renderContents(indentIncrement, needUnicode=needUnicode)
if showStructureIndent:
space = '\n%s' % (' ' * showStructureIndent)
if self.hidden:
s = contents
else:
s = []
attributeString = ''
if attrs:
attributeString = ' ' + ' '.join(attrs)
if showStructureIndent:
s.append(space)
s.append('<%s%s%s>' % (self.name, attributeString, close))
s.append(contents)
if closeTag and showStructureIndent != None:
s.append(space)
s.append(closeTag)
s = ''.join(s)
isUnicode = type(s) == types.UnicodeType
if needUnicode and not isUnicode:
s = unicode(s)
elif isUnicode and needUnicode==False:
s = str(s)
return s
def prettify(self, needUnicode=None):
return self.__str__(needUnicode, showStructureIndent=True)
def renderContents(self, showStructureIndent=None, needUnicode=None):
"""Renders the contents of this tag as a (possibly Unicode)
string."""
s=[]
for c in self:
text = None
if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType:
text = unicode(c)
elif isinstance(c, Tag):
s.append(c.__str__(needUnicode, showStructureIndent))
elif needUnicode:
text = unicode(c)
else:
text = str(c)
if text:
if showStructureIndent != None:
if text[-1] == '\n':
text = text[:-1]
s.append(text)
return ''.join(s)
#Soup methods
def firstText(self, text, recursive=True):
"""Convenience method to retrieve the first piece of text matching the
given criteria. 'text' can be a string, a regular expression object,
a callable that takes a string and returns whether or not the
string 'matches', etc."""
return self.first(recursive=recursive, text=text)
def fetchText(self, text, recursive=True, limit=None):
"""Convenience method to retrieve all pieces of text matching the
given criteria. 'text' can be a string, a regular expression object,
a callable that takes a string and returns whether or not the
string 'matches', etc."""
return self.fetch(recursive=recursive, text=text, limit=limit)
def first(self, name=None, attrs={}, recursive=True, text=None):
"""Return only the first child of this
Tag matching the given criteria."""
r = Null
l = self.fetch(name, attrs, recursive, text, 1)
if l:
r = l[0]
return r
findChild = first
def fetch(self, name=None, attrs={}, recursive=True, text=None,
limit=None):
"""Extracts a list of Tag objects that match the given
criteria. You can specify the name of the Tag and any
attributes you want the Tag to have.
The value of a key-value pair in the 'attrs' map can be a
string, a list of strings, a regular expression object, or a
callable that takes a string and returns whether or not the
string matches for some custom definition of 'matches'. The
same is true of the tag name."""
generator = self.recursiveChildGenerator
if not recursive:
generator = self.childGenerator
return self._fetch(name, attrs, text, limit, generator)
fetchChildren = fetch
#Utility methods
def isSelfClosing(self):
"""Returns true iff this is a self-closing tag as defined in the HTML
standard.
TODO: This is specific to BeautifulSoup and its subclasses, but it's
used by __str__"""
return self.name in BeautifulSoup.SELF_CLOSING_TAGS
def append(self, tag):
"""Appends the given tag to the contents of this tag."""
self.contents.append(tag)
#Private methods
def _getAttrMap(self):
"""Initializes a map representation of this tag's attributes,
if not already initialized."""
if not getattr(self, 'attrMap'):
self.attrMap = {}
for (key, value) in self.attrs:
self.attrMap[key] = value
return self.attrMap
#Generator methods
def childGenerator(self):
for i in range(0, len(self.contents)):
yield self.contents[i]
raise StopIteration
def recursiveChildGenerator(self):
stack = [(self, 0)]
while stack:
tag, start = stack.pop()
if isinstance(tag, Tag):
for i in range(start, len(tag.contents)):
a = tag.contents[i]
yield a
if isinstance(a, Tag) and tag.contents:
if i < len(tag.contents) - 1:
stack.append((tag, i+1))
stack.append((a, 0))
break
raise StopIteration
def isList(l):
"""Convenience method that works with all 2.x versions of Python
to determine whether or not something is listlike."""
return hasattr(l, '__iter__') \
or (type(l) in (types.ListType, types.TupleType))
def buildTagMap(default, *args):
"""Turns a list of maps, lists, or scalars into a single map.
Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out
of lists and partial maps."""
built = {}
for portion in args:
if hasattr(portion, 'items'):
#It's a map. Merge it.
for k,v in portion.items():
built[k] = v
elif isList(portion):
#It's a list. Map each item to the default.
for k in portion:
built[k] = default
else:
#It's a scalar. Map it to the default.
built[portion] = default
return built
class BeautifulStoneSoup(Tag, SGMLParser):
"""This class contains the basic parser and fetch code. It defines
a parser that knows nothing about tag behavior except for the
following:
You can't close a tag without closing all the tags it encloses.
That is, "" actually means
"".
[Another possible explanation is "", but since
this class defines no SELF_CLOSING_TAGS, it will never use that
explanation.]
This class is useful for parsing XML or made-up markup languages,
or when BeautifulSoup makes an assumption counter to what you were
expecting."""
SELF_CLOSING_TAGS = {}
NESTABLE_TAGS = {}
RESET_NESTING_TAGS = {}
QUOTE_TAGS = {}
#As a public service we will by default silently replace MS smart quotes
#and similar characters with their HTML or ASCII equivalents.
MS_CHARS = { '\x80' : '€',
'\x81' : ' ',
'\x82' : '‚',
'\x83' : 'ƒ',
'\x84' : '„',
'\x85' : '…',
'\x86' : '†',
'\x87' : '‡',
'\x88' : '⁁',
'\x89' : '%',
'\x8A' : 'Š',
'\x8B' : '<',
'\x8C' : 'Œ',
'\x8D' : '?',
'\x8E' : 'Z',
'\x8F' : '?',
'\x90' : '?',
'\x91' : '‘',
'\x92' : '’',
'\x93' : '“',
'\x94' : '”',
'\x95' : '•',
'\x96' : '–',
'\x97' : '—',
'\x98' : '˜',
'\x99' : '™',
'\x9a' : 'š',
'\x9b' : '>',
'\x9c' : 'œ',
'\x9d' : '?',
'\x9e' : 'z',
'\x9f' : 'Ÿ',}
PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
lambda(x):x.group(1) + ' />'),
(re.compile(']*)>'),
lambda(x):''),
(re.compile("([\x80-\x9f])"),
lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1)))
]
ROOT_TAG_NAME = '[document]'
def __init__(self, text=None, avoidParserProblems=True,
initialTextIsEverything=True):
"""Initialize this as the 'root tag' and feed in any text to
the parser.
NOTE about avoidParserProblems: sgmllib will process most bad
HTML, and BeautifulSoup has tricks for dealing with some HTML
that kills sgmllib, but Beautiful Soup can nonetheless choke
or lose data if your data uses self-closing tags or
declarations incorrectly. By default, Beautiful Soup sanitizes
its input to avoid the vast majority of these problems. The
problems are relatively rare, even in bad HTML, so feel free
to pass in False to avoidParserProblems if they don't apply to
you, and you'll get better performance. The only reason I have
this turned on by default is so I don't get so many tech
support questions.
The two most common instances of invalid HTML that will choke
sgmllib are fixed by the default parser massage techniques:
(No space between name of closing tag and tag close)
(Extraneous whitespace in declaration)
You can pass in a custom list of (RE object, replace method)
tuples to get Beautiful Soup to scrub your input the way you
want."""
Tag.__init__(self, self.ROOT_TAG_NAME)
if avoidParserProblems \
and not isList(avoidParserProblems):
avoidParserProblems = self.PARSER_MASSAGE
self.avoidParserProblems = avoidParserProblems
SGMLParser.__init__(self)
self.quoteStack = []
self.hidden = 1
self.reset()
if hasattr(text, 'read'):
#It's a file-type object.
text = text.read()
if text:
self.feed(text)
if initialTextIsEverything:
self.done()
def __getattr__(self, methodName):
"""This method routes method call requests to either the SGMLParser
superclass or the Tag superclass, depending on the method name."""
if methodName.find('start_') == 0 or methodName.find('end_') == 0 \
or methodName.find('do_') == 0:
return SGMLParser.__getattr__(self, methodName)
elif methodName.find('__') != 0:
return Tag.__getattr__(self, methodName)
else:
raise AttributeError
def feed(self, text):
if self.avoidParserProblems:
for fix, m in self.avoidParserProblems:
text = fix.sub(m, text)
SGMLParser.feed(self, text)
def done(self):
"""Called when you're done parsing, so that the unclosed tags can be
correctly processed."""
self.endData() #NEW
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()
def reset(self):
SGMLParser.reset(self)
self.currentData = []
self.currentTag = None
self.tagStack = []
self.pushTag(self)
def popTag(self):
tag = self.tagStack.pop()
# Tags with just one string-owning child get the child as a
# 'string' property, so that soup.tag.string is shorthand for
# soup.tag.contents[0]
if len(self.currentTag.contents) == 1 and \
isinstance(self.currentTag.contents[0], NavigableText):
self.currentTag.string = self.currentTag.contents[0]
#print "Pop", tag.name
if self.tagStack:
self.currentTag = self.tagStack[-1]
return self.currentTag
def pushTag(self, tag):
#print "Push", tag.name
if self.currentTag:
self.currentTag.append(tag)
self.tagStack.append(tag)
self.currentTag = self.tagStack[-1]
def endData(self):
currentData = ''.join(self.currentData)
if currentData:
if not currentData.strip():
if '\n' in currentData:
currentData = '\n'
else:
currentData = ' '
c = NavigableString
if type(currentData) == types.UnicodeType:
c = NavigableUnicodeString
o = c(currentData)
o.setup(self.currentTag, self.previous)
if self.previous:
self.previous.next = o
self.previous = o
self.currentTag.contents.append(o)
self.currentData = []
def _popToTag(self, name, inclusivePop=True):
"""Pops the tag stack up to and including the most recent
instance of the given tag. If inclusivePop is false, pops the tag
stack up to but *not* including the most recent instqance of
the given tag."""
if name == self.ROOT_TAG_NAME:
return
numPops = 0
mostRecentTag = None
for i in range(len(self.tagStack)-1, 0, -1):
if name == self.tagStack[i].name:
numPops = len(self.tagStack)-i
break
if not inclusivePop:
numPops = numPops - 1
for i in range(0, numPops):
mostRecentTag = self.popTag()
return mostRecentTag
def _smartPop(self, name):
"""We need to pop up to the previous tag of this type, unless
one of this tag's nesting reset triggers comes between this
tag and the previous tag of this type, OR unless this tag is a
generic nesting trigger and another generic nesting trigger
comes between this tag and the previous tag of this type.
Examples:
FooBar
should pop to 'p', not 'b'.
Foo
Bar
should pop to 'table', not 'p'.
Foo
Bar
should pop to 'tr', not 'p'.
FooBar
should pop to 'p', not 'b'.
*
* should pop to 'ul', not the first 'li'.
*
* should pop to 'table', not the first 'tr'
*
* should pop to 'tr', not the first 'td'
"""
nestingResetTriggers = self.NESTABLE_TAGS.get(name)
isNestable = nestingResetTriggers != None
isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
popTo = None
inclusive = True
for i in range(len(self.tagStack)-1, 0, -1):
p = self.tagStack[i]
if (not p or p.name == name) and not isNestable:
#Non-nestable tags get popped to the top or to their
#last occurance.
popTo = name
break
if (nestingResetTriggers != None
and p.name in nestingResetTriggers) \
or (nestingResetTriggers == None and isResetNesting
and self.RESET_NESTING_TAGS.has_key(p.name)):
#If we encounter one of the nesting reset triggers
#peculiar to this tag, or we encounter another tag
#that causes nesting to reset, pop up to but not
#including that tag.
popTo = p.name
inclusive = False
break
p = p.parent
if popTo:
self._popToTag(popTo, inclusive)
def unknown_starttag(self, name, attrs, selfClosing=0):
#print "Start tag %s" % name
if self.quoteStack:
#This is not a real tag.
#print "<%s> is not real!" % name
attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
self.handle_data('<%s%s>' % (name, attrs))
return
self.endData()
if not name in self.SELF_CLOSING_TAGS and not selfClosing:
self._smartPop(name)
tag = Tag(name, attrs, self.currentTag, self.previous)
if self.previous:
self.previous.next = tag
self.previous = tag
self.pushTag(tag)
if selfClosing or name in self.SELF_CLOSING_TAGS:
self.popTag()
if name in self.QUOTE_TAGS:
#print "Beginning quote (%s)" % name
self.quoteStack.append(name)
self.literal = 1
def unknown_endtag(self, name):
if self.quoteStack and self.quoteStack[-1] != name:
#This is not a real end tag.
#print "%s> is not real!" % name
self.handle_data('%s>' % name)
return
self.endData()
self._popToTag(name)
if self.quoteStack and self.quoteStack[-1] == name:
self.quoteStack.pop()
self.literal = (len(self.quoteStack) > 0)
def handle_data(self, data):
self.currentData.append(data)
def handle_pi(self, text):
"Propagate processing instructions right through."
self.handle_data("%s>" % text)
def handle_comment(self, text):
"Propagate comments right through."
self.handle_data("" % text)
def handle_charref(self, ref):
"Propagate char refs right through."
self.handle_data('%s;' % ref)
def handle_entityref(self, ref):
"Propagate entity refs right through."
self.handle_data('&%s;' % ref)
def handle_decl(self, data):
"Propagate DOCTYPEs and the like right through."
self.handle_data('' % data)
def parse_declaration(self, i):
"""Treat a bogus SGML declaration as raw data. Treat a CDATA
declaration as regular data."""
j = None
if self.rawdata[i:i+9] == '', i)
if k == -1:
k = len(self.rawdata)
self.handle_data(self.rawdata[i+9:k])
j = k+3
else:
try:
j = SGMLParser.parse_declaration(self, i)
except SGMLParseError:
toHandle = self.rawdata[i:]
self.handle_data(toHandle)
j = i + len(toHandle)
return j
class BeautifulSoup(BeautifulStoneSoup):
"""This parser knows the following facts about HTML:
* Some tags have no closing tag and should be interpreted as being
closed as soon as they are encountered.
* The text inside some tags (ie. 'script') may contain tags which
are not really part of the document and which should be parsed
as text, not tags. If you want to parse the text as tags, you can
always fetch it and parse it explicitly.
* Tag nesting rules:
Most tags can't be nested at all. For instance, the occurance of
a
tag should implicitly close the previous
tag.
Para1
Para2
should be transformed into:
Para1
Para2
Some tags can be nested arbitrarily. For instance, the occurance
of a
tag should _not_ implicitly close the previous
tag.
Alice said:
Bob said:
Blah
should NOT be transformed into:
Alice said:
Bob said:
Blah
Some tags can be nested, but the nesting is reset by the
interposition of other tags. For instance, a
tag should
implicitly close the previous
tag within the same
,
but not close a
tag in another table.
Blah
Blah
should be transformed into:
Blah
Blah
but,
Blah
Blah
should NOT be transformed into
Blah
Blah
Differing assumptions about tag nesting rules are a major source
of problems with the BeautifulSoup class. If BeautifulSoup is not
treating as nestable a tag your page author treats as nestable,
try ICantBelieveItsBeautifulSoup before writing your own
subclass."""
SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta',
'spacer', 'link', 'frame', 'base'])
QUOTE_TAGS = {'script': None}
#According to the HTML standard, each of these inline tags can
#contain another tag of the same type. Furthermore, it's common
#to actually use these tags this way.
NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
'center']
#According to the HTML standard, these block tags can contain
#another tag of the same type. Furthermore, it's common
#to actually use these tags this way.
NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']
#Lists can contain other lists, but there are restrictions.
NESTABLE_LIST_TAGS = { 'ol' : [],
'ul' : [],
'li' : ['ul', 'ol'],
'dl' : [],
'dd' : ['dl'],
'dt' : ['dl'] }
#Tables can contain other tables, but there are restrictions.
NESTABLE_TABLE_TAGS = {'table' : [],
'tr' : ['table', 'tbody', 'tfoot', 'thead'],
'td' : ['tr'],
'th' : ['tr'],
}
NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']
#If one of these tags is encountered, all tags up to the next tag of
#this type are popped.
RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
NON_NESTABLE_BLOCK_TAGS,
NESTABLE_LIST_TAGS,
NESTABLE_TABLE_TAGS)
NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
common HTML errors like unclosed tags. However, sometimes it makes
errors of its own. For instance, consider this fragment:
FooBar
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
encounters the second 'b'. It will think the author wrote
"FooBar", and didn't close the first 'b' tag, because
there's no real-world reason to bold something that's already
bold. When it encounters '' it will close two more 'b'
tags, for a grand total of three tags closed instead of two. This
can throw off the rest of your document structure. The same is
true of a number of other tags, listed below.
It's much more common for someone to forget to close (eg.) a 'b'
tag than to actually use nested 'b' tags, and the BeautifulSoup
class handles the common case. This class handles the
not-co-common case: where you can't believe someone wrote what
they did, but it's valid HTML and BeautifulSoup screwed up by
assuming it wouldn't be.
If this doesn't do what you need, try subclassing this class or
BeautifulSoup, and providing your own list of NESTABLE_TAGS."""
I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
'big']
I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
class BeautifulSOAP(BeautifulStoneSoup):
"""This class will push a tag with only a single string child into
the tag's parent as an attribute. The attribute's name is the tag
name, and the value is the string child. An example should give
the flavor of the change:
baz
=>
baz
You can then access fooTag['bar'] instead of fooTag.barTag.string.
This is, of course, useful for scraping structures that tend to
use subelements instead of attributes, such as SOAP messages. Note
that it modifies its input, so don't print the modified version
out.
I'm not sure how many people really want to use this class; let me
know if you do. Mainly I like the name."""
def popTag(self):
if len(self.tagStack) > 1:
tag = self.tagStack[-1]
parent = self.tagStack[-2]
parent._getAttrMap()
if (isinstance(tag, Tag) and len(tag.contents) == 1 and
isinstance(tag.contents[0], NavigableText) and
not parent.attrMap.has_key(tag.name)):
parent[tag.name] = tag.contents[0]
BeautifulStoneSoup.popTag(self)
#Enterprise class names! It has come to our attention that some people
#think the names of the Beautiful Soup parser classes are too silly
#and "unprofessional" for use in enterprise screen-scraping. We feel
#your pain! For such-minded folk, the Beautiful Soup Consortium And
#All-Night Kosher Bakery recommends renaming this file to
#"RobustParser.py" (or, in cases of extreme enterprisitude,
#"RobustParserBeanInterface.class") and using the following
#enterprise-friendly class aliases:
class RobustXMLParser(BeautifulStoneSoup):
pass
class RobustHTMLParser(BeautifulSoup):
pass
class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
pass
class SimplifyingSOAPParser(BeautifulSOAP):
pass
###
#By default, act as an HTML pretty-printer.
if __name__ == '__main__':
import sys
soup = BeautifulStoneSoup(sys.stdin.read())
print soup.prettify()
================================================
FILE: BruteXSS/mechanize/_clientcookie.py
================================================
"""HTTP cookie handling for web clients.
This module originally developed from my port of Gisle Aas' Perl module
HTTP::Cookies, from the libwww-perl library.
Docstrings, comments and debug strings in this code refer to the
attributes of the HTTP cookie system as cookie-attributes, to distinguish
them clearly from Python attributes.
CookieJar____
/ \ \
FileCookieJar \ \
/ | \ \ \
MozillaCookieJar | LWPCookieJar \ \
| | \
| ---MSIEBase | \
| / | | \
| / MSIEDBCookieJar BSDDBCookieJar
|/
MSIECookieJar
Comments to John J Lee .
Copyright 2002-2006 John J Lee
Copyright 1997-1999 Gisle Aas (original libwww-perl code)
Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import sys, re, copy, time, urllib, types, logging
try:
import threading
_threading = threading; del threading
except ImportError:
import dummy_threading
_threading = dummy_threading; del dummy_threading
MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
"instance initialised with one)")
DEFAULT_HTTP_PORT = "80"
from _headersutil import split_header_words, parse_ns_headers
from _util import isstringlike
import _rfc3986
debug = logging.getLogger("mechanize.cookies").debug
def reraise_unmasked_exceptions(unmasked=()):
# There are a few catch-all except: statements in this module, for
# catching input that's bad in unexpected ways.
# This function re-raises some exceptions we don't want to trap.
import mechanize, warnings
if not mechanize.USE_BARE_EXCEPT:
raise
unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
etype = sys.exc_info()[0]
if issubclass(etype, unmasked):
raise
# swallowed an exception
import traceback, StringIO
f = StringIO.StringIO()
traceback.print_exc(None, f)
msg = f.getvalue()
warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
IPV4_RE = re.compile(r"\.\d+$")
def is_HDN(text):
"""Return True if text is a host domain name."""
# XXX
# This may well be wrong. Which RFC is HDN defined in, if any (for
# the purposes of RFC 2965)?
# For the current implementation, what about IPv6? Remember to look
# at other uses of IPV4_RE also, if change this.
return not (IPV4_RE.search(text) or
text == "" or
text[0] == "." or text[-1] == ".")
def domain_match(A, B):
"""Return True if domain A domain-matches domain B, according to RFC 2965.
A and B may be host domain names or IP addresses.
RFC 2965, section 1:
Host names can be specified either as an IP address or a HDN string.
Sometimes we compare one host name with another. (Such comparisons SHALL
be case-insensitive.) Host A's name domain-matches host B's if
* their host name strings string-compare equal; or
* A is a HDN string and has the form NB, where N is a non-empty
name string, B has the form .B', and B' is a HDN string. (So,
x.y.com domain-matches .Y.com but not Y.com.)
Note that domain-match is not a commutative operation: a.b.c.com
domain-matches .c.com, but not the reverse.
"""
# Note that, if A or B are IP addresses, the only relevant part of the
# definition of the domain-match algorithm is the direct string-compare.
A = A.lower()
B = B.lower()
if A == B:
return True
if not is_HDN(A):
return False
i = A.rfind(B)
has_form_nb = not (i == -1 or i == 0)
return (
has_form_nb and
B.startswith(".") and
is_HDN(B[1:])
)
def liberal_is_HDN(text):
"""Return True if text is a sort-of-like a host domain name.
For accepting/blocking domains.
"""
return not IPV4_RE.search(text)
def user_domain_match(A, B):
"""For blocking/accepting domains.
A and B may be host domain names or IP addresses.
"""
A = A.lower()
B = B.lower()
if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
if A == B:
# equal IP addresses
return True
return False
initial_dot = B.startswith(".")
if initial_dot and A.endswith(B):
return True
if not initial_dot and A == B:
return True
return False
cut_port_re = re.compile(r":\d+$")
def request_host(request):
"""Return request-host, as defined by RFC 2965.
Variation from RFC: returned value is lowercased, for convenient
comparison.
"""
url = request.get_full_url()
host = _rfc3986.urlsplit(url)[1]
if host is None:
host = request.get_header("Host", "")
# remove port, if present
return cut_port_re.sub("", host, 1)
def request_host_lc(request):
return request_host(request).lower()
def eff_request_host(request):
"""Return a tuple (request-host, effective request-host name)."""
erhn = req_host = request_host(request)
if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
erhn = req_host + ".local"
return req_host, erhn
def eff_request_host_lc(request):
req_host, erhn = eff_request_host(request)
return req_host.lower(), erhn.lower()
def effective_request_host(request):
"""Return the effective request-host, as defined by RFC 2965."""
return eff_request_host(request)[1]
def request_path(request):
"""Return path component of request-URI, as defined by RFC 2965."""
url = request.get_full_url()
path = escape_path(_rfc3986.urlsplit(url)[2])
if not path.startswith("/"):
path = "/" + path
return path
def request_port(request):
host = request.get_host()
i = host.find(':')
if i >= 0:
port = host[i+1:]
try:
int(port)
except ValueError:
debug("nonnumeric port: '%s'", port)
return None
else:
port = DEFAULT_HTTP_PORT
return port
def request_is_unverifiable(request):
try:
return request.is_unverifiable()
except AttributeError:
if hasattr(request, "unverifiable"):
return request.unverifiable
else:
raise
# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
def uppercase_escaped_char(match):
return "%%%s" % match.group(1).upper()
def escape_path(path):
"""Escape any invalid characters in HTTP URL, and uppercase all escapes."""
# There's no knowing what character encoding was used to create URLs
# containing %-escapes, but since we have to pick one to escape invalid
# path characters, we pick UTF-8, as recommended in the HTML 4.0
# specification:
# http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
# And here, kind of: draft-fielding-uri-rfc2396bis-03
# (And in draft IRI specification: draft-duerst-iri-05)
# (And here, for new URI schemes: RFC 2718)
if isinstance(path, types.UnicodeType):
path = path.encode("utf-8")
path = urllib.quote(path, HTTP_PATH_SAFE)
path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
return path
def reach(h):
"""Return reach of host h, as defined by RFC 2965, section 1.
The reach R of a host name H is defined as follows:
* If
- H is the host domain name of a host; and,
- H has the form A.B; and
- A has no embedded (that is, interior) dots; and
- B has at least one embedded dot, or B is the string "local".
then the reach of H is .B.
* Otherwise, the reach of H is H.
>>> reach("www.acme.com")
'.acme.com'
>>> reach("acme.com")
'acme.com'
>>> reach("acme.local")
'.local'
"""
i = h.find(".")
if i >= 0:
#a = h[:i] # this line is only here to show what a is
b = h[i+1:]
i = b.find(".")
if is_HDN(h) and (i >= 0 or b == "local"):
return "."+b
return h
def is_third_party(request):
"""
RFC 2965, section 3.3.6:
An unverifiable transaction is to a third-party host if its request-
host U does not domain-match the reach R of the request-host O in the
origin transaction.
"""
req_host = request_host_lc(request)
# the origin request's request-host was stuffed into request by
# _urllib2_support.AbstractHTTPHandler
return not domain_match(req_host, reach(request.origin_req_host))
try:
all
except NameError:
# python 2.4
def all(iterable):
for x in iterable:
if not x:
return False
return True
class Cookie:
"""HTTP Cookie.
This class represents both Netscape and RFC 2965 cookies.
This is deliberately a very simple class. It just holds attributes. It's
possible to construct Cookie instances that don't comply with the cookie
standards. CookieJar.make_cookies is the factory function for Cookie
objects -- it deals with cookie parsing, supplying defaults, and
normalising to the representation used in this class. CookiePolicy is
responsible for checking them to see whether they should be accepted from
and returned to the server.
version: integer;
name: string;
value: string (may be None);
port: string; None indicates no attribute was supplied (e.g. "Port", rather
than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
string (e.g. "80,8080")
port_specified: boolean; true if a value was supplied with the Port
cookie-attribute
domain: string;
domain_specified: boolean; true if Domain was explicitly set
domain_initial_dot: boolean; true if Domain as set in HTTP header by server
started with a dot (yes, this really is necessary!)
path: string;
path_specified: boolean; true if Path was explicitly set
secure: boolean; true if should only be returned over secure connection
expires: integer; seconds since epoch (RFC 2965 cookies should calculate
this value from the Max-Age attribute)
discard: boolean, true if this is a session cookie; (if no expires value,
this should be true)
comment: string;
comment_url: string;
rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
Set-Cookie2:) header, but had a version cookie-attribute of 1
rest: mapping of other cookie-attributes
Note that the port may be present in the headers, but unspecified ("Port"
rather than"Port=80", for example); if this is the case, port is None.
"""
_attrs = ("version", "name", "value",
"port", "port_specified",
"domain", "domain_specified", "domain_initial_dot",
"path", "path_specified",
"secure", "expires", "discard", "comment", "comment_url",
"rfc2109", "_rest")
def __init__(self, version, name, value,
port, port_specified,
domain, domain_specified, domain_initial_dot,
path, path_specified,
secure,
expires,
discard,
comment,
comment_url,
rest,
rfc2109=False,
):
if version is not None: version = int(version)
if expires is not None: expires = int(expires)
if port is None and port_specified is True:
raise ValueError("if port is None, port_specified must be false")
self.version = version
self.name = name
self.value = value
self.port = port
self.port_specified = port_specified
# normalise case, as per RFC 2965 section 3.3.3
self.domain = domain.lower()
self.domain_specified = domain_specified
# Sigh. We need to know whether the domain given in the
# cookie-attribute had an initial dot, in order to follow RFC 2965
# (as clarified in draft errata). Needed for the returned $Domain
# value.
self.domain_initial_dot = domain_initial_dot
self.path = path
self.path_specified = path_specified
self.secure = secure
self.expires = expires
self.discard = discard
self.comment = comment
self.comment_url = comment_url
self.rfc2109 = rfc2109
self._rest = copy.copy(rest)
def has_nonstandard_attr(self, name):
return self._rest.has_key(name)
def get_nonstandard_attr(self, name, default=None):
return self._rest.get(name, default)
def set_nonstandard_attr(self, name, value):
self._rest[name] = value
def nonstandard_attr_keys(self):
return self._rest.keys()
def is_expired(self, now=None):
if now is None: now = time.time()
return (self.expires is not None) and (self.expires <= now)
def __eq__(self, other):
return all(getattr(self, a) == getattr(other, a) for a in self._attrs)
def __ne__(self, other):
return not (self == other)
def __str__(self):
if self.port is None: p = ""
else: p = ":"+self.port
limit = self.domain + p + self.path
if self.value is not None:
namevalue = "%s=%s" % (self.name, self.value)
else:
namevalue = self.name
return "" % (namevalue, limit)
def __repr__(self):
args = []
for name in ["version", "name", "value",
"port", "port_specified",
"domain", "domain_specified", "domain_initial_dot",
"path", "path_specified",
"secure", "expires", "discard", "comment", "comment_url",
]:
attr = getattr(self, name)
args.append("%s=%s" % (name, repr(attr)))
args.append("rest=%s" % repr(self._rest))
args.append("rfc2109=%s" % repr(self.rfc2109))
return "Cookie(%s)" % ", ".join(args)
class CookiePolicy:
"""Defines which cookies get accepted from and returned to server.
May also modify cookies.
The subclass DefaultCookiePolicy defines the standard rules for Netscape
and RFC 2965 cookies -- override that if you want a customised policy.
As well as implementing set_ok and return_ok, implementations of this
interface must also supply the following attributes, indicating which
protocols should be used, and how. These can be read and set at any time,
though whether that makes complete sense from the protocol point of view is
doubtful.
Public attributes:
netscape: implement netscape protocol
rfc2965: implement RFC 2965 protocol
rfc2109_as_netscape:
WARNING: This argument will change or go away if is not accepted into
the Python standard library in this form!
If true, treat RFC 2109 cookies as though they were Netscape cookies. The
default is for this attribute to be None, which means treat 2109 cookies
as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
by default), and as Netscape cookies otherwise.
hide_cookie2: don't add Cookie2 header to requests (the presence of
this header indicates to the server that we understand RFC 2965
cookies)
"""
def set_ok(self, cookie, request):
"""Return true if (and only if) cookie should be accepted from server.
Currently, pre-expired cookies never get this far -- the CookieJar
class deletes such cookies itself.
cookie: mechanize.Cookie object
request: object implementing the interface defined by
CookieJar.extract_cookies.__doc__
"""
raise NotImplementedError()
def return_ok(self, cookie, request):
"""Return true if (and only if) cookie should be returned to server.
cookie: mechanize.Cookie object
request: object implementing the interface defined by
CookieJar.add_cookie_header.__doc__
"""
raise NotImplementedError()
def domain_return_ok(self, domain, request):
"""Return false if cookies should not be returned, given cookie domain.
This is here as an optimization, to remove the need for checking every
cookie with a particular domain (which may involve reading many files).
The default implementations of domain_return_ok and path_return_ok
(return True) leave all the work to return_ok.
If domain_return_ok returns true for the cookie domain, path_return_ok
is called for the cookie path. Otherwise, path_return_ok and return_ok
are never called for that cookie domain. If path_return_ok returns
true, return_ok is called with the Cookie object itself for a full
check. Otherwise, return_ok is never called for that cookie path.
Note that domain_return_ok is called for every *cookie* domain, not
just for the *request* domain. For example, the function might be
called with both ".acme.com" and "www.acme.com" if the request domain
is "www.acme.com". The same goes for path_return_ok.
For argument documentation, see the docstring for return_ok.
"""
return True
def path_return_ok(self, path, request):
"""Return false if cookies should not be returned, given cookie path.
See the docstring for domain_return_ok.
"""
return True
class DefaultCookiePolicy(CookiePolicy):
"""Implements the standard rules for accepting and returning cookies.
Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is
switched off by default.
The easiest way to provide your own policy is to override this class and
call its methods in your overriden implementations before adding your own
additional checks.
import mechanize
class MyCookiePolicy(mechanize.DefaultCookiePolicy):
def set_ok(self, cookie, request):
if not mechanize.DefaultCookiePolicy.set_ok(
self, cookie, request):
return False
if i_dont_want_to_store_this_cookie():
return False
return True
In addition to the features required to implement the CookiePolicy
interface, this class allows you to block and allow domains from setting
and receiving cookies. There are also some strictness switches that allow
you to tighten up the rather loose Netscape protocol rules a little bit (at
the cost of blocking some benign cookies).
A domain blacklist and whitelist is provided (both off by default). Only
domains not in the blacklist and present in the whitelist (if the whitelist
is active) participate in cookie setting and returning. Use the
blocked_domains constructor argument, and blocked_domains and
set_blocked_domains methods (and the corresponding argument and methods for
allowed_domains). If you set a whitelist, you can turn it off again by
setting it to None.
Domains in block or allow lists that do not start with a dot must
string-compare equal. For example, "acme.com" matches a blacklist entry of
"acme.com", but "www.acme.com" does not. Domains that do start with a dot
are matched by more specific domains too. For example, both "www.acme.com"
and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
not). IP addresses are an exception, and must match exactly. For example,
if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
blocked, but 193.168.1.2 is not.
Additional Public Attributes:
General strictness switches
strict_domain: don't allow sites to set two-component domains with
country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
This is far from perfect and isn't guaranteed to work!
RFC 2965 protocol strictness switches
strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
transactions (usually, an unverifiable transaction is one resulting from
a redirect or an image hosted on another site); if this is false, cookies
are NEVER blocked on the basis of verifiability
Netscape protocol strictness switches
strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
even to Netscape cookies
strict_ns_domain: flags indicating how strict to be with domain-matching
rules for Netscape cookies:
DomainStrictNoDots: when setting cookies, host prefix must not contain a
dot (e.g. www.foo.bar.com can't set a cookie for .bar.com, because
www.foo contains a dot)
DomainStrictNonDomain: cookies that did not explicitly specify a Domain
cookie-attribute can only be returned to a domain that string-compares
equal to the domain that set the cookie (e.g. rockets.acme.com won't
be returned cookies from acme.com that had no Domain cookie-attribute)
DomainRFC2965Match: when setting cookies, require a full RFC 2965
domain-match
DomainLiberal and DomainStrict are the most useful combinations of the
above flags, for convenience
strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
have names starting with '$'
strict_ns_set_path: don't allow setting cookies whose path doesn't
path-match request URI
"""
DomainStrictNoDots = 1
DomainStrictNonDomain = 2
DomainRFC2965Match = 4
DomainLiberal = 0
DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
def __init__(self,
blocked_domains=None, allowed_domains=None,
netscape=True, rfc2965=False,
# WARNING: this argument will change or go away if is not
# accepted into the Python standard library in this form!
# default, ie. treat 2109 as netscape iff not rfc2965
rfc2109_as_netscape=None,
hide_cookie2=False,
strict_domain=False,
strict_rfc2965_unverifiable=True,
strict_ns_unverifiable=False,
strict_ns_domain=DomainLiberal,
strict_ns_set_initial_dollar=False,
strict_ns_set_path=False,
):
"""
Constructor arguments should be used as keyword arguments only.
blocked_domains: sequence of domain names that we never accept cookies
from, nor return cookies to
allowed_domains: if not None, this is a sequence of the only domains
for which we accept and return cookies
For other arguments, see CookiePolicy.__doc__ and
DefaultCookiePolicy.__doc__..
"""
self.netscape = netscape
self.rfc2965 = rfc2965
self.rfc2109_as_netscape = rfc2109_as_netscape
self.hide_cookie2 = hide_cookie2
self.strict_domain = strict_domain
self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
self.strict_ns_unverifiable = strict_ns_unverifiable
self.strict_ns_domain = strict_ns_domain
self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
self.strict_ns_set_path = strict_ns_set_path
if blocked_domains is not None:
self._blocked_domains = tuple(blocked_domains)
else:
self._blocked_domains = ()
if allowed_domains is not None:
allowed_domains = tuple(allowed_domains)
self._allowed_domains = allowed_domains
def blocked_domains(self):
"""Return the sequence of blocked domains (as a tuple)."""
return self._blocked_domains
def set_blocked_domains(self, blocked_domains):
"""Set the sequence of blocked domains."""
self._blocked_domains = tuple(blocked_domains)
def is_blocked(self, domain):
for blocked_domain in self._blocked_domains:
if user_domain_match(domain, blocked_domain):
return True
return False
def allowed_domains(self):
"""Return None, or the sequence of allowed domains (as a tuple)."""
return self._allowed_domains
def set_allowed_domains(self, allowed_domains):
"""Set the sequence of allowed domains, or None."""
if allowed_domains is not None:
allowed_domains = tuple(allowed_domains)
self._allowed_domains = allowed_domains
def is_not_allowed(self, domain):
if self._allowed_domains is None:
return False
for allowed_domain in self._allowed_domains:
if user_domain_match(domain, allowed_domain):
return False
return True
def set_ok(self, cookie, request):
"""
If you override set_ok, be sure to call this method. If it returns
false, so should your subclass (assuming your subclass wants to be more
strict about which cookies to accept).
"""
debug(" - checking cookie %s", cookie)
assert cookie.name is not None
for n in "version", "verifiability", "name", "path", "domain", "port":
fn_name = "set_ok_"+n
fn = getattr(self, fn_name)
if not fn(cookie, request):
return False
return True
def set_ok_version(self, cookie, request):
if cookie.version is None:
# Version is always set to 0 by parse_ns_headers if it's a Netscape
# cookie, so this must be an invalid RFC 2965 cookie.
debug(" Set-Cookie2 without version attribute (%s)", cookie)
return False
if cookie.version > 0 and not self.rfc2965:
debug(" RFC 2965 cookies are switched off")
return False
elif cookie.version == 0 and not self.netscape:
debug(" Netscape cookies are switched off")
return False
return True
def set_ok_verifiability(self, cookie, request):
if request_is_unverifiable(request) and is_third_party(request):
if cookie.version > 0 and self.strict_rfc2965_unverifiable:
debug(" third-party RFC 2965 cookie during "
"unverifiable transaction")
return False
elif cookie.version == 0 and self.strict_ns_unverifiable:
debug(" third-party Netscape cookie during "
"unverifiable transaction")
return False
return True
def set_ok_name(self, cookie, request):
# Try and stop servers setting V0 cookies designed to hack other
# servers that know both V0 and V1 protocols.
if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
cookie.name.startswith("$")):
debug(" illegal name (starts with '$'): '%s'", cookie.name)
return False
return True
def set_ok_path(self, cookie, request):
if cookie.path_specified:
req_path = request_path(request)
if ((cookie.version > 0 or
(cookie.version == 0 and self.strict_ns_set_path)) and
not req_path.startswith(cookie.path)):
debug(" path attribute %s is not a prefix of request "
"path %s", cookie.path, req_path)
return False
return True
def set_ok_countrycode_domain(self, cookie, request):
"""Return False if explicit cookie domain is not acceptable.
Called by set_ok_domain, for convenience of overriding by
subclasses.
"""
if cookie.domain_specified and self.strict_domain:
domain = cookie.domain
# since domain was specified, we know that:
assert domain.startswith(".")
if domain.count(".") == 2:
# domain like .foo.bar
i = domain.rfind(".")
tld = domain[i+1:]
sld = domain[1:i]
if (sld.lower() in [
"co", "ac",
"com", "edu", "org", "net", "gov", "mil", "int",
"aero", "biz", "cat", "coop", "info", "jobs", "mobi",
"museum", "name", "pro", "travel",
] and
len(tld) == 2):
# domain like .co.uk
return False
return True
def set_ok_domain(self, cookie, request):
if self.is_blocked(cookie.domain):
debug(" domain %s is in user block-list", cookie.domain)
return False
if self.is_not_allowed(cookie.domain):
debug(" domain %s is not in user allow-list", cookie.domain)
return False
if not self.set_ok_countrycode_domain(cookie, request):
debug(" country-code second level domain %s", cookie.domain)
return False
if cookie.domain_specified:
req_host, erhn = eff_request_host_lc(request)
domain = cookie.domain
if domain.startswith("."):
undotted_domain = domain[1:]
else:
undotted_domain = domain
embedded_dots = (undotted_domain.find(".") >= 0)
if not embedded_dots and domain != ".local":
debug(" non-local domain %s contains no embedded dot",
domain)
return False
if cookie.version == 0:
if (not erhn.endswith(domain) and
(not erhn.startswith(".") and
not ("."+erhn).endswith(domain))):
debug(" effective request-host %s (even with added "
"initial dot) does not end end with %s",
erhn, domain)
return False
if (cookie.version > 0 or
(self.strict_ns_domain & self.DomainRFC2965Match)):
if not domain_match(erhn, domain):
debug(" effective request-host %s does not domain-match "
"%s", erhn, domain)
return False
if (cookie.version > 0 or
(self.strict_ns_domain & self.DomainStrictNoDots)):
host_prefix = req_host[:-len(domain)]
if (host_prefix.find(".") >= 0 and
not IPV4_RE.search(req_host)):
debug(" host prefix %s for domain %s contains a dot",
host_prefix, domain)
return False
return True
def set_ok_port(self, cookie, request):
if cookie.port_specified:
req_port = request_port(request)
if req_port is None:
req_port = "80"
else:
req_port = str(req_port)
for p in cookie.port.split(","):
try:
int(p)
except ValueError:
debug(" bad port %s (not numeric)", p)
return False
if p == req_port:
break
else:
debug(" request port (%s) not found in %s",
req_port, cookie.port)
return False
return True
def return_ok(self, cookie, request):
"""
If you override return_ok, be sure to call this method. If it returns
false, so should your subclass (assuming your subclass wants to be more
strict about which cookies to return).
"""
# Path has already been checked by path_return_ok, and domain blocking
# done by domain_return_ok.
debug(" - checking cookie %s", cookie)
for n in ("version", "verifiability", "secure", "expires", "port",
"domain"):
fn_name = "return_ok_"+n
fn = getattr(self, fn_name)
if not fn(cookie, request):
return False
return True
def return_ok_version(self, cookie, request):
if cookie.version > 0 and not self.rfc2965:
debug(" RFC 2965 cookies are switched off")
return False
elif cookie.version == 0 and not self.netscape:
debug(" Netscape cookies are switched off")
return False
return True
def return_ok_verifiability(self, cookie, request):
if request_is_unverifiable(request) and is_third_party(request):
if cookie.version > 0 and self.strict_rfc2965_unverifiable:
debug(" third-party RFC 2965 cookie during unverifiable "
"transaction")
return False
elif cookie.version == 0 and self.strict_ns_unverifiable:
debug(" third-party Netscape cookie during unverifiable "
"transaction")
return False
return True
def return_ok_secure(self, cookie, request):
if cookie.secure and request.get_type() != "https":
debug(" secure cookie with non-secure request")
return False
return True
def return_ok_expires(self, cookie, request):
if cookie.is_expired(self._now):
debug(" cookie expired")
return False
return True
def return_ok_port(self, cookie, request):
if cookie.port:
req_port = request_port(request)
if req_port is None:
req_port = "80"
for p in cookie.port.split(","):
if p == req_port:
break
else:
debug(" request port %s does not match cookie port %s",
req_port, cookie.port)
return False
return True
def return_ok_domain(self, cookie, request):
req_host, erhn = eff_request_host_lc(request)
domain = cookie.domain
# strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
if (cookie.version == 0 and
(self.strict_ns_domain & self.DomainStrictNonDomain) and
not cookie.domain_specified and domain != erhn):
debug(" cookie with unspecified domain does not string-compare "
"equal to request domain")
return False
if cookie.version > 0 and not domain_match(erhn, domain):
debug(" effective request-host name %s does not domain-match "
"RFC 2965 cookie domain %s", erhn, domain)
return False
if cookie.version == 0 and not ("."+erhn).endswith(domain):
debug(" request-host %s does not match Netscape cookie domain "
"%s", req_host, domain)
return False
return True
def domain_return_ok(self, domain, request):
# Liberal check of domain. This is here as an optimization to avoid
# having to load lots of MSIE cookie files unless necessary.
# Munge req_host and erhn to always start with a dot, so as to err on
# the side of letting cookies through.
dotted_req_host, dotted_erhn = eff_request_host_lc(request)
if not dotted_req_host.startswith("."):
dotted_req_host = "."+dotted_req_host
if not dotted_erhn.startswith("."):
dotted_erhn = "."+dotted_erhn
if not (dotted_req_host.endswith(domain) or
dotted_erhn.endswith(domain)):
#debug(" request domain %s does not match cookie domain %s",
# req_host, domain)
return False
if self.is_blocked(domain):
debug(" domain %s is in user block-list", domain)
return False
if self.is_not_allowed(domain):
debug(" domain %s is not in user allow-list", domain)
return False
return True
def path_return_ok(self, path, request):
debug("- checking cookie path=%s", path)
req_path = request_path(request)
if not req_path.startswith(path):
debug(" %s does not path-match %s", req_path, path)
return False
return True
def vals_sorted_by_key(adict):
keys = adict.keys()
keys.sort()
return map(adict.get, keys)
class MappingIterator:
"""Iterates over nested mapping, depth-first, in sorted order by key."""
def __init__(self, mapping):
self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack
def __iter__(self): return self
def next(self):
# this is hairy because of lack of generators
while 1:
try:
vals, i, prev_item = self._s.pop()
except IndexError:
raise StopIteration()
if i < len(vals):
item = vals[i]
i = i + 1
self._s.append((vals, i, prev_item))
try:
item.items
except AttributeError:
# non-mapping
break
else:
# mapping
self._s.append((vals_sorted_by_key(item), 0, item))
continue
return item
# Used as second parameter to dict.get method, to distinguish absent
# dict key from one with a None value.
class Absent: pass
class CookieJar:
"""Collection of HTTP cookies.
You may not need to know about this class: try mechanize.urlopen().
The major methods are extract_cookies and add_cookie_header; these are all
you are likely to need.
CookieJar supports the iterator protocol:
for cookie in cookiejar:
# do something with cookie
Methods:
add_cookie_header(request)
extract_cookies(response, request)
get_policy()
set_policy(policy)
cookies_for_request(request)
make_cookies(response, request)
set_cookie_if_ok(cookie, request)
set_cookie(cookie)
clear_session_cookies()
clear_expired_cookies()
clear(domain=None, path=None, name=None)
Public attributes
policy: CookiePolicy object
"""
non_word_re = re.compile(r"\W")
quote_re = re.compile(r"([\"\\])")
strict_domain_re = re.compile(r"\.?[^.]*")
domain_re = re.compile(r"[^.]*")
dots_re = re.compile(r"^\.+")
def __init__(self, policy=None):
"""
See CookieJar.__doc__ for argument documentation.
"""
if policy is None:
policy = DefaultCookiePolicy()
self._policy = policy
self._cookies = {}
# for __getitem__ iteration in pre-2.2 Pythons
self._prev_getitem_index = 0
def get_policy(self):
return self._policy
def set_policy(self, policy):
self._policy = policy
def _cookies_for_domain(self, domain, request):
cookies = []
if not self._policy.domain_return_ok(domain, request):
return []
debug("Checking %s for cookies to return", domain)
cookies_by_path = self._cookies[domain]
for path in cookies_by_path.keys():
if not self._policy.path_return_ok(path, request):
continue
cookies_by_name = cookies_by_path[path]
for cookie in cookies_by_name.values():
if not self._policy.return_ok(cookie, request):
debug(" not returning cookie")
continue
debug(" it's a match")
cookies.append(cookie)
return cookies
def cookies_for_request(self, request):
"""Return a list of cookies to be returned to server.
The returned list of cookie instances is sorted in the order they
should appear in the Cookie: header for return to the server.
See add_cookie_header.__doc__ for the interface required of the
request argument.
New in version 0.1.10
"""
self._policy._now = self._now = int(time.time())
cookies = self._cookies_for_request(request)
# add cookies in order of most specific (i.e. longest) path first
def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
cookies.sort(decreasing_size)
return cookies
def _cookies_for_request(self, request):
"""Return a list of cookies to be returned to server."""
# this method still exists (alongside cookies_for_request) because it
# is part of an implied protected interface for subclasses of cookiejar
# XXX document that implied interface, or provide another way of
# implementing cookiejars than subclassing
cookies = []
for domain in self._cookies.keys():
cookies.extend(self._cookies_for_domain(domain, request))
return cookies
def _cookie_attrs(self, cookies):
"""Return a list of cookie-attributes to be returned to server.
The $Version attribute is also added when appropriate (currently only
once per request).
>>> jar = CookieJar()
>>> ns_cookie = Cookie(0, "foo", '"bar"', None, False,
... "example.com", False, False,
... "/", False, False, None, True,
... None, None, {})
>>> jar._cookie_attrs([ns_cookie])
['foo="bar"']
>>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False,
... ".example.com", True, False,
... "/", False, False, None, True,
... None, None, {})
>>> jar._cookie_attrs([rfc2965_cookie])
['$Version=1', 'foo=bar', '$Domain="example.com"']
"""
version_set = False
attrs = []
for cookie in cookies:
# set version of Cookie header
# XXX
# What should it be if multiple matching Set-Cookie headers have
# different versions themselves?
# Answer: there is no answer; was supposed to be settled by
# RFC 2965 errata, but that may never appear...
version = cookie.version
if not version_set:
version_set = True
if version > 0:
attrs.append("$Version=%s" % version)
# quote cookie value if necessary
# (not for Netscape protocol, which already has any quotes
# intact, due to the poorly-specified Netscape Cookie: syntax)
if ((cookie.value is not None) and
self.non_word_re.search(cookie.value) and version > 0):
value = self.quote_re.sub(r"\\\1", cookie.value)
else:
value = cookie.value
# add cookie-attributes to be returned in Cookie header
if cookie.value is None:
attrs.append(cookie.name)
else:
attrs.append("%s=%s" % (cookie.name, value))
if version > 0:
if cookie.path_specified:
attrs.append('$Path="%s"' % cookie.path)
if cookie.domain.startswith("."):
domain = cookie.domain
if (not cookie.domain_initial_dot and
domain.startswith(".")):
domain = domain[1:]
attrs.append('$Domain="%s"' % domain)
if cookie.port is not None:
p = "$Port"
if cookie.port_specified:
p = p + ('="%s"' % cookie.port)
attrs.append(p)
return attrs
def add_cookie_header(self, request):
"""Add correct Cookie: header to request (mechanize.Request object).
The Cookie2 header is also added unless policy.hide_cookie2 is true.
The request object (usually a mechanize.Request instance) must support
the methods get_full_url, get_host, is_unverifiable, get_type,
has_header, get_header, header_items and add_unredirected_header, as
documented by urllib2.
"""
debug("add_cookie_header")
cookies = self.cookies_for_request(request)
attrs = self._cookie_attrs(cookies)
if attrs:
if not request.has_header("Cookie"):
request.add_unredirected_header("Cookie", "; ".join(attrs))
# if necessary, advertise that we know RFC 2965
if self._policy.rfc2965 and not self._policy.hide_cookie2:
for cookie in cookies:
if cookie.version != 1 and not request.has_header("Cookie2"):
request.add_unredirected_header("Cookie2", '$Version="1"')
break
self.clear_expired_cookies()
def _normalized_cookie_tuples(self, attrs_set):
"""Return list of tuples containing normalised cookie information.
attrs_set is the list of lists of key,value pairs extracted from
the Set-Cookie or Set-Cookie2 headers.
Tuples are name, value, standard, rest, where name and value are the
cookie name and value, standard is a dictionary containing the standard
cookie-attributes (discard, secure, version, expires or max-age,
domain, path and port) and rest is a dictionary containing the rest of
the cookie-attributes.
"""
cookie_tuples = []
boolean_attrs = "discard", "secure"
value_attrs = ("version",
"expires", "max-age",
"domain", "path", "port",
"comment", "commenturl")
for cookie_attrs in attrs_set:
name, value = cookie_attrs[0]
# Build dictionary of standard cookie-attributes (standard) and
# dictionary of other cookie-attributes (rest).
# Note: expiry time is normalised to seconds since epoch. V0
# cookies should have the Expires cookie-attribute, and V1 cookies
# should have Max-Age, but since V1 includes RFC 2109 cookies (and
# since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
# accept either (but prefer Max-Age).
max_age_set = False
bad_cookie = False
standard = {}
rest = {}
for k, v in cookie_attrs[1:]:
lc = k.lower()
# don't lose case distinction for unknown fields
if lc in value_attrs or lc in boolean_attrs:
k = lc
if k in boolean_attrs and v is None:
# boolean cookie-attribute is present, but has no value
# (like "discard", rather than "port=80")
v = True
if standard.has_key(k):
# only first value is significant
continue
if k == "domain":
if v is None:
debug(" missing value for domain attribute")
bad_cookie = True
break
# RFC 2965 section 3.3.3
v = v.lower()
if k == "expires":
if max_age_set:
# Prefer max-age to expires (like Mozilla)
continue
if v is None:
debug(" missing or invalid value for expires "
"attribute: treating as session cookie")
continue
if k == "max-age":
max_age_set = True
if v is None:
debug(" missing value for max-age attribute")
bad_cookie = True
break
try:
v = int(v)
except ValueError:
debug(" missing or invalid (non-numeric) value for "
"max-age attribute")
bad_cookie = True
break
# convert RFC 2965 Max-Age to seconds since epoch
# XXX Strictly you're supposed to follow RFC 2616
# age-calculation rules. Remember that zero Max-Age is a
# is a request to discard (old and new) cookie, though.
k = "expires"
v = self._now + v
if (k in value_attrs) or (k in boolean_attrs):
if (v is None and
k not in ["port", "comment", "commenturl"]):
debug(" missing value for %s attribute" % k)
bad_cookie = True
break
standard[k] = v
else:
rest[k] = v
if bad_cookie:
continue
cookie_tuples.append((name, value, standard, rest))
return cookie_tuples
def _cookie_from_cookie_tuple(self, tup, request):
# standard is dict of standard cookie-attributes, rest is dict of the
# rest of them
name, value, standard, rest = tup
domain = standard.get("domain", Absent)
path = standard.get("path", Absent)
port = standard.get("port", Absent)
expires = standard.get("expires", Absent)
# set the easy defaults
version = standard.get("version", None)
if version is not None:
try:
version = int(version)
except ValueError:
return None # invalid version, ignore cookie
secure = standard.get("secure", False)
# (discard is also set if expires is Absent)
discard = standard.get("discard", False)
comment = standard.get("comment", None)
comment_url = standard.get("commenturl", None)
# set default path
if path is not Absent and path != "":
path_specified = True
path = escape_path(path)
else:
path_specified = False
path = request_path(request)
i = path.rfind("/")
if i != -1:
if version == 0:
# Netscape spec parts company from reality here
path = path[:i]
else:
path = path[:i+1]
if len(path) == 0: path = "/"
# set default domain
domain_specified = domain is not Absent
# but first we have to remember whether it starts with a dot
domain_initial_dot = False
if domain_specified:
domain_initial_dot = bool(domain.startswith("."))
if domain is Absent:
req_host, erhn = eff_request_host_lc(request)
domain = erhn
elif not domain.startswith("."):
domain = "."+domain
# set default port
port_specified = False
if port is not Absent:
if port is None:
# Port attr present, but has no value: default to request port.
# Cookie should then only be sent back on that port.
port = request_port(request)
else:
port_specified = True
port = re.sub(r"\s+", "", port)
else:
# No port attr present. Cookie can be sent back on any port.
port = None
# set default expires and discard
if expires is Absent:
expires = None
discard = True
return Cookie(version,
name, value,
port, port_specified,
domain, domain_specified, domain_initial_dot,
path, path_specified,
secure,
expires,
discard,
comment,
comment_url,
rest)
def _cookies_from_attrs_set(self, attrs_set, request):
cookie_tuples = self._normalized_cookie_tuples(attrs_set)
cookies = []
for tup in cookie_tuples:
cookie = self._cookie_from_cookie_tuple(tup, request)
if cookie: cookies.append(cookie)
return cookies
def _process_rfc2109_cookies(self, cookies):
if self._policy.rfc2109_as_netscape is None:
rfc2109_as_netscape = not self._policy.rfc2965
else:
rfc2109_as_netscape = self._policy.rfc2109_as_netscape
for cookie in cookies:
if cookie.version == 1:
cookie.rfc2109 = True
if rfc2109_as_netscape:
# treat 2109 cookies as Netscape cookies rather than
# as RFC2965 cookies
cookie.version = 0
def _make_cookies(self, response, request):
# get cookie-attributes for RFC 2965 and Netscape protocols
headers = response.info()
rfc2965_hdrs = headers.getheaders("Set-Cookie2")
ns_hdrs = headers.getheaders("Set-Cookie")
rfc2965 = self._policy.rfc2965
netscape = self._policy.netscape
if ((not rfc2965_hdrs and not ns_hdrs) or
(not ns_hdrs and not rfc2965) or
(not rfc2965_hdrs and not netscape) or
(not netscape and not rfc2965)):
return [] # no relevant cookie headers: quick exit
try:
cookies = self._cookies_from_attrs_set(
split_header_words(rfc2965_hdrs), request)
except:
reraise_unmasked_exceptions()
cookies = []
if ns_hdrs and netscape:
try:
# RFC 2109 and Netscape cookies
ns_cookies = self._cookies_from_attrs_set(
parse_ns_headers(ns_hdrs), request)
except:
reraise_unmasked_exceptions()
ns_cookies = []
self._process_rfc2109_cookies(ns_cookies)
# Look for Netscape cookies (from Set-Cookie headers) that match
# corresponding RFC 2965 cookies (from Set-Cookie2 headers).
# For each match, keep the RFC 2965 cookie and ignore the Netscape
# cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
# bundled in with the Netscape cookies for this purpose, which is
# reasonable behaviour.
if rfc2965:
lookup = {}
for cookie in cookies:
lookup[(cookie.domain, cookie.path, cookie.name)] = None
def no_matching_rfc2965(ns_cookie, lookup=lookup):
key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
return not lookup.has_key(key)
ns_cookies = filter(no_matching_rfc2965, ns_cookies)
if ns_cookies:
cookies.extend(ns_cookies)
return cookies
def make_cookies(self, response, request):
"""Return sequence of Cookie objects extracted from response object.
See extract_cookies.__doc__ for the interface required of the
response and request arguments.
"""
self._policy._now = self._now = int(time.time())
return [cookie for cookie in self._make_cookies(response, request)
if cookie.expires is None or not cookie.expires <= self._now]
def set_cookie_if_ok(self, cookie, request):
"""Set a cookie if policy says it's OK to do so.
cookie: mechanize.Cookie instance
request: see extract_cookies.__doc__ for the required interface
"""
self._policy._now = self._now = int(time.time())
if self._policy.set_ok(cookie, request):
self.set_cookie(cookie)
def set_cookie(self, cookie):
"""Set a cookie, without checking whether or not it should be set.
cookie: mechanize.Cookie instance
"""
c = self._cookies
if not c.has_key(cookie.domain): c[cookie.domain] = {}
c2 = c[cookie.domain]
if not c2.has_key(cookie.path): c2[cookie.path] = {}
c3 = c2[cookie.path]
c3[cookie.name] = cookie
def extract_cookies(self, response, request):
"""Extract cookies from response, where allowable given the request.
Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
object passed as argument. Any of these headers that are found are
used to update the state of the object (subject to the policy.set_ok
method's approval).
The response object (usually be the result of a call to
mechanize.urlopen, or similar) should support an info method, which
returns a mimetools.Message object (in fact, the 'mimetools.Message
object' may be any object that provides a getheaders method).
The request object (usually a mechanize.Request instance) must support
the methods get_full_url, get_type, get_host, and is_unverifiable, as
documented by mechanize, and the port attribute (the port number). The
request is used to set default values for cookie-attributes as well as
for checking that the cookie is OK to be set.
"""
debug("extract_cookies: %s", response.info())
self._policy._now = self._now = int(time.time())
for cookie in self._make_cookies(response, request):
if cookie.expires is not None and cookie.expires <= self._now:
# Expiry date in past is request to delete cookie. This can't be
# in DefaultCookiePolicy, because can't delete cookies there.
try:
self.clear(cookie.domain, cookie.path, cookie.name)
except KeyError:
pass
debug("Expiring cookie, domain='%s', path='%s', name='%s'",
cookie.domain, cookie.path, cookie.name)
elif self._policy.set_ok(cookie, request):
debug(" setting cookie: %s", cookie)
self.set_cookie(cookie)
def clear(self, domain=None, path=None, name=None):
"""Clear some cookies.
Invoking this method without arguments will clear all cookies. If
given a single argument, only cookies belonging to that domain will be
removed. If given two arguments, cookies belonging to the specified
path within that domain are removed. If given three arguments, then
the cookie with the specified name, path and domain is removed.
Raises KeyError if no matching cookie exists.
"""
if name is not None:
if (domain is None) or (path is None):
raise ValueError(
"domain and path must be given to remove a cookie by name")
del self._cookies[domain][path][name]
elif path is not None:
if domain is None:
raise ValueError(
"domain must be given to remove cookies by path")
del self._cookies[domain][path]
elif domain is not None:
del self._cookies[domain]
else:
self._cookies = {}
def clear_session_cookies(self):
"""Discard all session cookies.
Discards all cookies held by object which had either no Max-Age or
Expires cookie-attribute or an explicit Discard cookie-attribute, or
which otherwise have ended up with a true discard attribute. For
interactive browsers, the end of a session usually corresponds to
closing the browser window.
Note that the save method won't save session cookies anyway, unless you
ask otherwise by passing a true ignore_discard argument.
"""
for cookie in self:
if cookie.discard:
self.clear(cookie.domain, cookie.path, cookie.name)
def clear_expired_cookies(self):
"""Discard all expired cookies.
You probably don't need to call this method: expired cookies are never
sent back to the server (provided you're using DefaultCookiePolicy),
this method is called by CookieJar itself every so often, and the save
method won't save expired cookies anyway (unless you ask otherwise by
passing a true ignore_expires argument).
"""
now = time.time()
for cookie in self:
if cookie.is_expired(now):
self.clear(cookie.domain, cookie.path, cookie.name)
def __getitem__(self, i):
if i == 0:
self._getitem_iterator = self.__iter__()
elif self._prev_getitem_index != i-1: raise IndexError(
"CookieJar.__getitem__ only supports sequential iteration")
self._prev_getitem_index = i
try:
return self._getitem_iterator.next()
except StopIteration:
raise IndexError()
def __iter__(self):
return MappingIterator(self._cookies)
def __len__(self):
"""Return number of contained cookies."""
i = 0
for cookie in self: i = i + 1
return i
def __repr__(self):
r = []
for cookie in self: r.append(repr(cookie))
return "<%s[%s]>" % (self.__class__, ", ".join(r))
def __str__(self):
r = []
for cookie in self: r.append(str(cookie))
return "<%s[%s]>" % (self.__class__, ", ".join(r))
class LoadError(Exception): pass
class FileCookieJar(CookieJar):
"""CookieJar that can be loaded from and saved to a file.
Additional methods
save(filename=None, ignore_discard=False, ignore_expires=False)
load(filename=None, ignore_discard=False, ignore_expires=False)
revert(filename=None, ignore_discard=False, ignore_expires=False)
Additional public attributes
filename: filename for loading and saving cookies
Additional public readable attributes
delayload: request that cookies are lazily loaded from disk; this is only
a hint since this only affects performance, not behaviour (unless the
cookies on disk are changing); a CookieJar object may ignore it (in fact,
only MSIECookieJar lazily loads cookies at the moment)
"""
def __init__(self, filename=None, delayload=False, policy=None):
"""
See FileCookieJar.__doc__ for argument documentation.
Cookies are NOT loaded from the named file until either the load or
revert method is called.
"""
CookieJar.__init__(self, policy)
if filename is not None and not isstringlike(filename):
raise ValueError("filename must be string-like")
self.filename = filename
self.delayload = bool(delayload)
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Save cookies to a file.
filename: name of file in which to save cookies
ignore_discard: save even cookies set to be discarded
ignore_expires: save even cookies that have expired
The file is overwritten if it already exists, thus wiping all its
cookies. Saved cookies can be restored later using the load or revert
methods. If filename is not specified, self.filename is used; if
self.filename is None, ValueError is raised.
"""
raise NotImplementedError()
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file.
Old cookies are kept unless overwritten by newly loaded ones.
Arguments are as for .save().
If filename is not specified, self.filename is used; if self.filename
is None, ValueError is raised. The named file must be in the format
understood by the class, or LoadError will be raised. This format will
be identical to that written by the save method, unless the load format
is not sufficiently well understood (as is the case for MSIECookieJar).
"""
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
f = open(filename)
try:
self._really_load(f, filename, ignore_discard, ignore_expires)
finally:
f.close()
def revert(self, filename=None,
ignore_discard=False, ignore_expires=False):
"""Clear all cookies and reload cookies from a saved file.
Raises LoadError (or IOError) if reversion is not successful; the
object's state will not be altered if this happens.
"""
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
old_state = copy.deepcopy(self._cookies)
self._cookies = {}
try:
self.load(filename, ignore_discard, ignore_expires)
except (LoadError, IOError):
self._cookies = old_state
raise
================================================
FILE: BruteXSS/mechanize/_debug.py
================================================
import logging
from _response import response_seek_wrapper
from _urllib2_fork import BaseHandler
class HTTPResponseDebugProcessor(BaseHandler):
handler_order = 900 # before redirections, after everything else
def http_response(self, request, response):
if not hasattr(response, "seek"):
response = response_seek_wrapper(response)
info = logging.getLogger("mechanize.http_responses").info
try:
info(response.read())
finally:
response.seek(0)
info("*****************************************************")
return response
https_response = http_response
class HTTPRedirectDebugProcessor(BaseHandler):
def http_request(self, request):
if hasattr(request, "redirect_dict"):
info = logging.getLogger("mechanize.http_redirects").info
info("redirecting to %s", request.get_full_url())
return request
================================================
FILE: BruteXSS/mechanize/_firefox3cookiejar.py
================================================
"""Firefox 3 "cookies.sqlite" cookie persistence.
Copyright 2008 John J Lee
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import logging
import time
from _clientcookie import CookieJar, Cookie, MappingIterator
from _util import isstringlike, experimental
debug = logging.getLogger("mechanize.cookies").debug
class Firefox3CookieJar(CookieJar):
"""Firefox 3 cookie jar.
The cookies are stored in Firefox 3's "cookies.sqlite" format.
Constructor arguments:
filename: filename of cookies.sqlite (typically found at the top level
of a firefox profile directory)
autoconnect: as a convenience, connect to the SQLite cookies database at
Firefox3CookieJar construction time (default True)
policy: an object satisfying the mechanize.CookiePolicy interface
Note that this is NOT a FileCookieJar, and there are no .load(),
.save() or .restore() methods. The database is in sync with the
cookiejar object's state after each public method call.
Following Firefox's own behaviour, session cookies are never saved to
the database.
The file is created, and an sqlite database written to it, if it does
not already exist. The moz_cookies database table is created if it does
not already exist.
"""
# XXX
# handle DatabaseError exceptions
# add a FileCookieJar (explicit .save() / .revert() / .load() methods)
def __init__(self, filename, autoconnect=True, policy=None):
experimental("Firefox3CookieJar is experimental code")
CookieJar.__init__(self, policy)
if filename is not None and not isstringlike(filename):
raise ValueError("filename must be string-like")
self.filename = filename
self._conn = None
if autoconnect:
self.connect()
def connect(self):
import sqlite3 # not available in Python 2.4 stdlib
self._conn = sqlite3.connect(self.filename)
self._conn.isolation_level = "DEFERRED"
self._create_table_if_necessary()
def close(self):
self._conn.close()
def _transaction(self, func):
try:
cur = self._conn.cursor()
try:
result = func(cur)
finally:
cur.close()
except:
self._conn.rollback()
raise
else:
self._conn.commit()
return result
def _execute(self, query, params=()):
return self._transaction(lambda cur: cur.execute(query, params))
def _query(self, query, params=()):
# XXX should we bother with a transaction?
cur = self._conn.cursor()
try:
cur.execute(query, params)
return cur.fetchall()
finally:
cur.close()
def _create_table_if_necessary(self):
self._execute("""\
CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT,
value TEXT, host TEXT, path TEXT,expiry INTEGER,
lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""")
def _cookie_from_row(self, row):
(pk, name, value, domain, path, expires,
last_accessed, secure, http_only) = row
version = 0
domain = domain.encode("ascii", "ignore")
path = path.encode("ascii", "ignore")
name = name.encode("ascii", "ignore")
value = value.encode("ascii", "ignore")
secure = bool(secure)
# last_accessed isn't a cookie attribute, so isn't added to rest
rest = {}
if http_only:
rest["HttpOnly"] = None
if name == "":
name = value
value = None
initial_dot = domain.startswith(".")
domain_specified = initial_dot
discard = False
if expires == "":
expires = None
discard = True
return Cookie(version, name, value,
None, False,
domain, domain_specified, initial_dot,
path, False,
secure,
expires,
discard,
None,
None,
rest)
def clear(self, domain=None, path=None, name=None):
CookieJar.clear(self, domain, path, name)
where_parts = []
sql_params = []
if domain is not None:
where_parts.append("host = ?")
sql_params.append(domain)
if path is not None:
where_parts.append("path = ?")
sql_params.append(path)
if name is not None:
where_parts.append("name = ?")
sql_params.append(name)
where = " AND ".join(where_parts)
if where:
where = " WHERE " + where
def clear(cur):
cur.execute("DELETE FROM moz_cookies%s" % where,
tuple(sql_params))
self._transaction(clear)
def _row_from_cookie(self, cookie, cur):
expires = cookie.expires
if cookie.discard:
expires = ""
domain = unicode(cookie.domain)
path = unicode(cookie.path)
name = unicode(cookie.name)
value = unicode(cookie.value)
secure = bool(int(cookie.secure))
if value is None:
value = name
name = ""
last_accessed = int(time.time())
http_only = cookie.has_nonstandard_attr("HttpOnly")
query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""")
pk = query.fetchone()[0]
if pk is None:
pk = 1
return (pk, name, value, domain, path, expires,
last_accessed, secure, http_only)
def set_cookie(self, cookie):
if cookie.discard:
CookieJar.set_cookie(self, cookie)
return
def set_cookie(cur):
# XXX
# is this RFC 2965-correct?
# could this do an UPDATE instead?
row = self._row_from_cookie(cookie, cur)
name, unused, domain, path = row[1:5]
cur.execute("""\
DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""",
(domain, path, name))
cur.execute("""\
INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", row)
self._transaction(set_cookie)
def __iter__(self):
# session (non-persistent) cookies
for cookie in MappingIterator(self._cookies):
yield cookie
# persistent cookies
for row in self._query("""\
SELECT * FROM moz_cookies ORDER BY name, path, host"""):
yield self._cookie_from_row(row)
def _cookies_for_request(self, request):
session_cookies = CookieJar._cookies_for_request(self, request)
def get_cookies(cur):
query = cur.execute("SELECT host from moz_cookies")
domains = [row[0] for row in query.fetchall()]
cookies = []
for domain in domains:
cookies += self._persistent_cookies_for_domain(domain,
request, cur)
return cookies
persistent_coookies = self._transaction(get_cookies)
return session_cookies + persistent_coookies
def _persistent_cookies_for_domain(self, domain, request, cur):
cookies = []
if not self._policy.domain_return_ok(domain, request):
return []
debug("Checking %s for cookies to return", domain)
query = cur.execute("""\
SELECT * from moz_cookies WHERE host = ? ORDER BY path""",
(domain,))
cookies = [self._cookie_from_row(row) for row in query.fetchall()]
last_path = None
r = []
for cookie in cookies:
if (cookie.path != last_path and
not self._policy.path_return_ok(cookie.path, request)):
last_path = cookie.path
continue
if not self._policy.return_ok(cookie, request):
debug(" not returning cookie")
continue
debug(" it's a match")
r.append(cookie)
return r
================================================
FILE: BruteXSS/mechanize/_form.py
================================================
"""HTML form handling for web clients.
HTML form handling for web clients: useful for parsing HTML forms, filling them
in and returning the completed forms to the server. This code developed from a
port of Gisle Aas' Perl module HTML::Form, from the libwww-perl library, but
the interface is not the same.
The most useful docstring is the one for HTMLForm.
RFC 1866: HTML 2.0
RFC 1867: Form-based File Upload in HTML
RFC 2388: Returning Values from Forms: multipart/form-data
HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
HTML 4.01 Specification, W3C Recommendation 24 December 1999
Copyright 2002-2007 John J. Lee
Copyright 2005 Gary Poster
Copyright 2005 Zope Corporation
Copyright 1998-2000 Gisle Aas.
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
# TODO:
# Clean up post the merge into mechanize
# * Remove code that was duplicated in ClientForm and mechanize
# * Remove weird import stuff
# * Remove pre-Python 2.4 compatibility cruft
# * Clean up tests
# * Later release: Remove the ClientForm 0.1 backwards-compatibility switch
# Remove parser testing hack
# Clean action URI
# Switch to unicode throughout
# See Wichert Akkerman's 2004-01-22 message to c.l.py.
# Apply recommendations from google code project CURLIES
# Apply recommendations from HTML 5 spec
# Add charset parameter to Content-type headers? How to find value??
# Functional tests to add:
# Single and multiple file upload
# File upload with missing name (check standards)
# mailto: submission & enctype text/plain??
# Replace by_label etc. with moniker / selector concept. Allows, e.g., a
# choice between selection by value / id / label / element contents. Or
# choice between matching labels exactly or by substring. etc.
__all__ = ['AmbiguityError', 'CheckboxControl', 'Control',
'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm',
'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl',
'Item', 'ItemCountError', 'ItemNotFoundError', 'Label',
'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile',
'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl',
'RadioControl', 'ScalarControl', 'SelectControl',
'SubmitButtonControl', 'SubmitControl', 'TextControl',
'TextareaControl', 'XHTMLCompatibleFormParser']
import HTMLParser
from cStringIO import StringIO
import inspect
import logging
import random
import re
import sys
import urllib
import urlparse
import warnings
import _beautifulsoup
import _request
# from Python itself, for backwards compatibility of raised exceptions
import sgmllib
# bundled copy of sgmllib
import _sgmllib_copy
VERSION = "0.2.11"
CHUNK = 1024 # size of chunks fed to parser, in bytes
DEFAULT_ENCODING = "latin-1"
_logger = logging.getLogger("mechanize.forms")
OPTIMIZATION_HACK = True
def debug(msg, *args, **kwds):
if OPTIMIZATION_HACK:
return
caller_name = inspect.stack()[1][3]
extended_msg = '%%s %s' % msg
extended_args = (caller_name,)+args
_logger.debug(extended_msg, *extended_args, **kwds)
def _show_debug_messages():
global OPTIMIZATION_HACK
OPTIMIZATION_HACK = False
_logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
_logger.addHandler(handler)
def deprecation(message, stack_offset=0):
warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset)
class Missing: pass
_compress_re = re.compile(r"\s+")
def compress_text(text): return _compress_re.sub(" ", text.strip())
def normalize_line_endings(text):
return re.sub(r"(?:(?
w = MimeWriter(f)
...call w.addheader(key, value) 0 or more times...
followed by either:
f = w.startbody(content_type)
...call f.write(data) for body data...
or:
w.startmultipartbody(subtype)
for each part:
subwriter = w.nextpart()
...use the subwriter's methods to create the subpart...
w.lastpart()
The subwriter is another MimeWriter instance, and should be
treated in the same way as the toplevel MimeWriter. This way,
writing recursive body parts is easy.
Warning: don't forget to call lastpart()!
XXX There should be more state so calls made in the wrong order
are detected.
Some special cases:
- startbody() just returns the file passed to the constructor;
but don't use this knowledge, as it may be changed.
- startmultipartbody() actually returns a file as well;
this can be used to write the initial 'if you can read this your
mailer is not MIME-aware' message.
- If you call flushheaders(), the headers accumulated so far are
written out (and forgotten); this is useful if you don't need a
body part at all, e.g. for a subpart of type message/rfc822
that's (mis)used to store some header-like information.
- Passing a keyword argument 'prefix=' to addheader(),
start*body() affects where the header is inserted; 0 means
append at the end, 1 means insert at the start; default is
append for addheader(), but insert for start*body(), which use
it to determine where the Content-type header goes.
"""
def __init__(self, fp, http_hdrs=None):
self._http_hdrs = http_hdrs
self._fp = fp
self._headers = []
self._boundary = []
self._first_part = True
def addheader(self, key, value, prefix=0,
add_to_http_hdrs=0):
"""
prefix is ignored if add_to_http_hdrs is true.
"""
lines = value.split("\r\n")
while lines and not lines[-1]: del lines[-1]
while lines and not lines[0]: del lines[0]
if add_to_http_hdrs:
value = "".join(lines)
# 2.2 urllib2 doesn't normalize header case
self._http_hdrs.append((key.capitalize(), value))
else:
for i in range(1, len(lines)):
lines[i] = " " + lines[i].strip()
value = "\r\n".join(lines) + "\r\n"
line = key.title() + ": " + value
if prefix:
self._headers.insert(0, line)
else:
self._headers.append(line)
def flushheaders(self):
self._fp.writelines(self._headers)
self._headers = []
def startbody(self, ctype=None, plist=[], prefix=1,
add_to_http_hdrs=0, content_type=1):
"""
prefix is ignored if add_to_http_hdrs is true.
"""
if content_type and ctype:
for name, value in plist:
ctype = ctype + ';\r\n %s=%s' % (name, value)
self.addheader("Content-Type", ctype, prefix=prefix,
add_to_http_hdrs=add_to_http_hdrs)
self.flushheaders()
if not add_to_http_hdrs: self._fp.write("\r\n")
self._first_part = True
return self._fp
def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
add_to_http_hdrs=0, content_type=1):
boundary = boundary or choose_boundary()
self._boundary.append(boundary)
return self.startbody("multipart/" + subtype,
[("boundary", boundary)] + plist,
prefix=prefix,
add_to_http_hdrs=add_to_http_hdrs,
content_type=content_type)
def nextpart(self):
boundary = self._boundary[-1]
if self._first_part:
self._first_part = False
else:
self._fp.write("\r\n")
self._fp.write("--" + boundary + "\r\n")
return self.__class__(self._fp)
def lastpart(self):
if self._first_part:
self.nextpart()
boundary = self._boundary.pop()
self._fp.write("\r\n--" + boundary + "--\r\n")
class LocateError(ValueError): pass
class AmbiguityError(LocateError): pass
class ControlNotFoundError(LocateError): pass
class ItemNotFoundError(LocateError): pass
class ItemCountError(ValueError): pass
# for backwards compatibility, ParseError derives from exceptions that were
# raised by versions of ClientForm <= 0.2.5
# TODO: move to _html
class ParseError(sgmllib.SGMLParseError,
HTMLParser.HTMLParseError):
def __init__(self, *args, **kwds):
Exception.__init__(self, *args, **kwds)
def __str__(self):
return Exception.__str__(self)
class _AbstractFormParser:
"""forms attribute contains HTMLForm instances on completion."""
# thanks to Moshe Zadka for an example of sgmllib/htmllib usage
def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
if entitydefs is None:
entitydefs = get_entitydefs()
self._entitydefs = entitydefs
self._encoding = encoding
self.base = None
self.forms = []
self.labels = []
self._current_label = None
self._current_form = None
self._select = None
self._optgroup = None
self._option = None
self._textarea = None
# forms[0] will contain all controls that are outside of any form
# self._global_form is an alias for self.forms[0]
self._global_form = None
self.start_form([])
self.end_form()
self._current_form = self._global_form = self.forms[0]
def do_base(self, attrs):
debug("%s", attrs)
for key, value in attrs:
if key == "href":
self.base = self.unescape_attr_if_required(value)
def end_body(self):
debug("")
if self._current_label is not None:
self.end_label()
if self._current_form is not self._global_form:
self.end_form()
def start_form(self, attrs):
debug("%s", attrs)
if self._current_form is not self._global_form:
raise ParseError("nested FORMs")
name = None
action = None
enctype = "application/x-www-form-urlencoded"
method = "GET"
d = {}
for key, value in attrs:
if key == "name":
name = self.unescape_attr_if_required(value)
elif key == "action":
action = self.unescape_attr_if_required(value)
elif key == "method":
method = self.unescape_attr_if_required(value.upper())
elif key == "enctype":
enctype = self.unescape_attr_if_required(value.lower())
d[key] = self.unescape_attr_if_required(value)
controls = []
self._current_form = (name, action, method, enctype), d, controls
def end_form(self):
debug("")
if self._current_label is not None:
self.end_label()
if self._current_form is self._global_form:
raise ParseError("end of FORM before start")
self.forms.append(self._current_form)
self._current_form = self._global_form
def start_select(self, attrs):
debug("%s", attrs)
if self._select is not None:
raise ParseError("nested SELECTs")
if self._textarea is not None:
raise ParseError("SELECT inside TEXTAREA")
d = {}
for key, val in attrs:
d[key] = self.unescape_attr_if_required(val)
self._select = d
self._add_label(d)
self._append_select_control({"__select": d})
def end_select(self):
debug("")
if self._select is None:
raise ParseError("end of SELECT before start")
if self._option is not None:
self._end_option()
self._select = None
def start_optgroup(self, attrs):
debug("%s", attrs)
if self._select is None:
raise ParseError("OPTGROUP outside of SELECT")
d = {}
for key, val in attrs:
d[key] = self.unescape_attr_if_required(val)
self._optgroup = d
def end_optgroup(self):
debug("")
if self._optgroup is None:
raise ParseError("end of OPTGROUP before start")
self._optgroup = None
def _start_option(self, attrs):
debug("%s", attrs)
if self._select is None:
raise ParseError("OPTION outside of SELECT")
if self._option is not None:
self._end_option()
d = {}
for key, val in attrs:
d[key] = self.unescape_attr_if_required(val)
self._option = {}
self._option.update(d)
if (self._optgroup and self._optgroup.has_key("disabled") and
not self._option.has_key("disabled")):
self._option["disabled"] = None
def _end_option(self):
debug("")
if self._option is None:
raise ParseError("end of OPTION before start")
contents = self._option.get("contents", "").strip()
self._option["contents"] = contents
if not self._option.has_key("value"):
self._option["value"] = contents
if not self._option.has_key("label"):
self._option["label"] = contents
# stuff dict of SELECT HTML attrs into a special private key
# (gets deleted again later)
self._option["__select"] = self._select
self._append_select_control(self._option)
self._option = None
def _append_select_control(self, attrs):
debug("%s", attrs)
controls = self._current_form[2]
name = self._select.get("name")
controls.append(("select", name, attrs))
def start_textarea(self, attrs):
debug("%s", attrs)
if self._textarea is not None:
raise ParseError("nested TEXTAREAs")
if self._select is not None:
raise ParseError("TEXTAREA inside SELECT")
d = {}
for key, val in attrs:
d[key] = self.unescape_attr_if_required(val)
self._add_label(d)
self._textarea = d
def end_textarea(self):
debug("")
if self._textarea is None:
raise ParseError("end of TEXTAREA before start")
controls = self._current_form[2]
name = self._textarea.get("name")
controls.append(("textarea", name, self._textarea))
self._textarea = None
def start_label(self, attrs):
debug("%s", attrs)
if self._current_label:
self.end_label()
d = {}
for key, val in attrs:
d[key] = self.unescape_attr_if_required(val)
taken = bool(d.get("for")) # empty id is invalid
d["__text"] = ""
d["__taken"] = taken
if taken:
self.labels.append(d)
self._current_label = d
def end_label(self):
debug("")
label = self._current_label
if label is None:
# something is ugly in the HTML, but we're ignoring it
return
self._current_label = None
# if it is staying around, it is True in all cases
del label["__taken"]
def _add_label(self, d):
#debug("%s", d)
if self._current_label is not None:
if not self._current_label["__taken"]:
self._current_label["__taken"] = True
d["__label"] = self._current_label
def handle_data(self, data):
debug("%s", data)
if self._option is not None:
# self._option is a dictionary of the OPTION element's HTML
# attributes, but it has two special keys, one of which is the
# special "contents" key contains text between OPTION tags (the
# other is the "__select" key: see the end_option method)
map = self._option
key = "contents"
elif self._textarea is not None:
map = self._textarea
key = "value"
data = normalize_line_endings(data)
# not if within option or textarea
elif self._current_label is not None:
map = self._current_label
key = "__text"
else:
return
if data and not map.has_key(key):
# according to
# http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break
# immediately after start tags or immediately before end tags must
# be ignored, but real browsers only ignore a line break after a
# start tag, so we'll do that.
if data[0:2] == "\r\n":
data = data[2:]
elif data[0:1] in ["\n", "\r"]:
data = data[1:]
map[key] = data
else:
map[key] = map[key] + data
def do_button(self, attrs):
debug("%s", attrs)
d = {}
d["type"] = "submit" # default
for key, val in attrs:
d[key] = self.unescape_attr_if_required(val)
controls = self._current_form[2]
type = d["type"]
name = d.get("name")
# we don't want to lose information, so use a type string that
# doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
# e.g. type for BUTTON/RESET is "resetbutton"
# (type for INPUT/RESET is "reset")
type = type+"button"
self._add_label(d)
controls.append((type, name, d))
def do_input(self, attrs):
debug("%s", attrs)
d = {}
d["type"] = "text" # default
for key, val in attrs:
d[key] = self.unescape_attr_if_required(val)
controls = self._current_form[2]
type = d["type"]
name = d.get("name")
self._add_label(d)
controls.append((type, name, d))
def do_isindex(self, attrs):
debug("%s", attrs)
d = {}
for key, val in attrs:
d[key] = self.unescape_attr_if_required(val)
controls = self._current_form[2]
self._add_label(d)
# isindex doesn't have type or name HTML attributes
controls.append(("isindex", None, d))
def handle_entityref(self, name):
#debug("%s", name)
self.handle_data(unescape(
'&%s;' % name, self._entitydefs, self._encoding))
def handle_charref(self, name):
#debug("%s", name)
self.handle_data(unescape_charref(name, self._encoding))
def unescape_attr(self, name):
#debug("%s", name)
return unescape(name, self._entitydefs, self._encoding)
def unescape_attrs(self, attrs):
#debug("%s", attrs)
escaped_attrs = {}
for key, val in attrs.items():
try:
val.items
except AttributeError:
escaped_attrs[key] = self.unescape_attr(val)
else:
# e.g. "__select" -- yuck!
escaped_attrs[key] = self.unescape_attrs(val)
return escaped_attrs
def unknown_entityref(self, ref): self.handle_data("&%s;" % ref)
def unknown_charref(self, ref): self.handle_data("%s;" % ref)
class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
"""Good for XHTML, bad for tolerance of incorrect HTML."""
# thanks to Michael Howitz for this!
def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
HTMLParser.HTMLParser.__init__(self)
_AbstractFormParser.__init__(self, entitydefs, encoding)
def feed(self, data):
try:
HTMLParser.HTMLParser.feed(self, data)
except HTMLParser.HTMLParseError, exc:
raise ParseError(exc)
def start_option(self, attrs):
_AbstractFormParser._start_option(self, attrs)
def end_option(self):
_AbstractFormParser._end_option(self)
def handle_starttag(self, tag, attrs):
try:
method = getattr(self, "start_" + tag)
except AttributeError:
try:
method = getattr(self, "do_" + tag)
except AttributeError:
pass # unknown tag
else:
method(attrs)
else:
method(attrs)
def handle_endtag(self, tag):
try:
method = getattr(self, "end_" + tag)
except AttributeError:
pass # unknown tag
else:
method()
def unescape(self, name):
# Use the entitydefs passed into constructor, not
# HTMLParser.HTMLParser's entitydefs.
return self.unescape_attr(name)
def unescape_attr_if_required(self, name):
return name # HTMLParser.HTMLParser already did it
def unescape_attrs_if_required(self, attrs):
return attrs # ditto
def close(self):
HTMLParser.HTMLParser.close(self)
self.end_body()
class _AbstractSgmllibParser(_AbstractFormParser):
def do_option(self, attrs):
_AbstractFormParser._start_option(self, attrs)
# we override this attr to decode hex charrefs
entity_or_charref = re.compile(
'&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
def convert_entityref(self, name):
return unescape("&%s;" % name, self._entitydefs, self._encoding)
def convert_charref(self, name):
return unescape_charref("%s" % name, self._encoding)
def unescape_attr_if_required(self, name):
return name # sgmllib already did it
def unescape_attrs_if_required(self, attrs):
return attrs # ditto
class FormParser(_AbstractSgmllibParser, _sgmllib_copy.SGMLParser):
"""Good for tolerance of incorrect HTML, bad for XHTML."""
def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
_sgmllib_copy.SGMLParser.__init__(self)
_AbstractFormParser.__init__(self, entitydefs, encoding)
def feed(self, data):
try:
_sgmllib_copy.SGMLParser.feed(self, data)
except _sgmllib_copy.SGMLParseError, exc:
raise ParseError(exc)
def close(self):
_sgmllib_copy.SGMLParser.close(self)
self.end_body()
class _AbstractBSFormParser(_AbstractSgmllibParser):
bs_base_class = None
def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
_AbstractFormParser.__init__(self, entitydefs, encoding)
self.bs_base_class.__init__(self)
def handle_data(self, data):
_AbstractFormParser.handle_data(self, data)
self.bs_base_class.handle_data(self, data)
def feed(self, data):
try:
self.bs_base_class.feed(self, data)
except _sgmllib_copy.SGMLParseError, exc:
raise ParseError(exc)
def close(self):
self.bs_base_class.close(self)
self.end_body()
class RobustFormParser(_AbstractBSFormParser, _beautifulsoup.BeautifulSoup):
"""Tries to be highly tolerant of incorrect HTML."""
bs_base_class = _beautifulsoup.BeautifulSoup
class NestingRobustFormParser(_AbstractBSFormParser,
_beautifulsoup.ICantBelieveItsBeautifulSoup):
"""Tries to be highly tolerant of incorrect HTML.
Different from RobustFormParser in that it more often guesses nesting
above missing end tags (see BeautifulSoup docs).
"""
bs_base_class = _beautifulsoup.ICantBelieveItsBeautifulSoup
#FormParser = XHTMLCompatibleFormParser # testing hack
#FormParser = RobustFormParser # testing hack
def ParseResponseEx(response,
select_default=False,
form_parser_class=FormParser,
request_class=_request.Request,
entitydefs=None,
encoding=DEFAULT_ENCODING,
# private
_urljoin=urlparse.urljoin,
_urlparse=urlparse.urlparse,
_urlunparse=urlparse.urlunparse,
):
"""Identical to ParseResponse, except that:
1. The returned list contains an extra item. The first form in the list
contains all controls not contained in any FORM element.
2. The arguments ignore_errors and backwards_compat have been removed.
3. Backwards-compatibility mode (backwards_compat=True) is not available.
"""
return _ParseFileEx(response, response.geturl(),
select_default,
False,
form_parser_class,
request_class,
entitydefs,
False,
encoding,
_urljoin=_urljoin,
_urlparse=_urlparse,
_urlunparse=_urlunparse,
)
def ParseFileEx(file, base_uri,
select_default=False,
form_parser_class=FormParser,
request_class=_request.Request,
entitydefs=None,
encoding=DEFAULT_ENCODING,
# private
_urljoin=urlparse.urljoin,
_urlparse=urlparse.urlparse,
_urlunparse=urlparse.urlunparse,
):
"""Identical to ParseFile, except that:
1. The returned list contains an extra item. The first form in the list
contains all controls not contained in any FORM element.
2. The arguments ignore_errors and backwards_compat have been removed.
3. Backwards-compatibility mode (backwards_compat=True) is not available.
"""
return _ParseFileEx(file, base_uri,
select_default,
False,
form_parser_class,
request_class,
entitydefs,
False,
encoding,
_urljoin=_urljoin,
_urlparse=_urlparse,
_urlunparse=_urlunparse,
)
def ParseString(text, base_uri, *args, **kwds):
fh = StringIO(text)
return ParseFileEx(fh, base_uri, *args, **kwds)
def ParseResponse(response, *args, **kwds):
"""Parse HTTP response and return a list of HTMLForm instances.
The return value of mechanize.urlopen can be conveniently passed to this
function as the response parameter.
mechanize.ParseError is raised on parse errors.
response: file-like object (supporting read() method) with a method
geturl(), returning the URI of the HTTP response
select_default: for multiple-selection SELECT controls and RADIO controls,
pick the first item as the default if none are selected in the HTML
form_parser_class: class to instantiate and use to pass
request_class: class to return from .click() method (default is
mechanize.Request)
entitydefs: mapping like {"&": "&", ...} containing HTML entity
definitions (a sensible default is used)
encoding: character encoding used for encoding numeric character references
when matching link text. mechanize does not attempt to find the encoding
in a META HTTP-EQUIV attribute in the document itself (mechanize, for
example, does do that and will pass the correct value to mechanize using
this parameter).
backwards_compat: boolean that determines whether the returned HTMLForm
objects are backwards-compatible with old code. If backwards_compat is
true:
- ClientForm 0.1 code will continue to work as before.
- Label searches that do not specify a nr (number or count) will always
get the first match, even if other controls match. If
backwards_compat is False, label searches that have ambiguous results
will raise an AmbiguityError.
- Item label matching is done by strict string comparison rather than
substring matching.
- De-selecting individual list items is allowed even if the Item is
disabled.
The backwards_compat argument will be removed in a future release.
Pass a true value for select_default if you want the behaviour specified by
RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
RADIO or multiple-selection SELECT control if none were selected in the
HTML. Most browsers (including Microsoft Internet Explorer (IE) and
Netscape Navigator) instead leave all items unselected in these cases. The
W3C HTML 4.0 standard leaves this behaviour undefined in the case of
multiple-selection SELECT controls, but insists that at least one RADIO
button should be checked at all times, in contradiction to browser
behaviour.
There is a choice of parsers. mechanize.XHTMLCompatibleFormParser (uses
HTMLParser.HTMLParser) works best for XHTML, mechanize.FormParser (uses
bundled copy of sgmllib.SGMLParser) (the default) works better for ordinary
grubby HTML. Note that HTMLParser is only available in Python 2.2 and
later. You can pass your own class in here as a hack to work around bad
HTML, but at your own risk: there is no well-defined interface.
"""
return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:]
def ParseFile(file, base_uri, *args, **kwds):
"""Parse HTML and return a list of HTMLForm instances.
mechanize.ParseError is raised on parse errors.
file: file-like object (supporting read() method) containing HTML with zero
or more forms to be parsed
base_uri: the URI of the document (note that the base URI used to submit
the form will be that given in the BASE element if present, not that of
the document)
For the other arguments and further details, see ParseResponse.__doc__.
"""
return _ParseFileEx(file, base_uri, *args, **kwds)[1:]
def _ParseFileEx(file, base_uri,
select_default=False,
ignore_errors=False,
form_parser_class=FormParser,
request_class=_request.Request,
entitydefs=None,
backwards_compat=True,
encoding=DEFAULT_ENCODING,
_urljoin=urlparse.urljoin,
_urlparse=urlparse.urlparse,
_urlunparse=urlparse.urlunparse,
):
if backwards_compat:
deprecation("operating in backwards-compatibility mode", 1)
fp = form_parser_class(entitydefs, encoding)
while 1:
data = file.read(CHUNK)
try:
fp.feed(data)
except ParseError, e:
e.base_uri = base_uri
raise
if len(data) != CHUNK: break
fp.close()
if fp.base is not None:
# HTML BASE element takes precedence over document URI
base_uri = fp.base
labels = [] # Label(label) for label in fp.labels]
id_to_labels = {}
for l in fp.labels:
label = Label(l)
labels.append(label)
for_id = l["for"]
coll = id_to_labels.get(for_id)
if coll is None:
id_to_labels[for_id] = [label]
else:
coll.append(label)
forms = []
for (name, action, method, enctype), attrs, controls in fp.forms:
if action is None:
action = base_uri
else:
action = _urljoin(base_uri, action)
# would be nice to make HTMLForm class (form builder) pluggable
form = HTMLForm(
action, method, enctype, name, attrs, request_class,
forms, labels, id_to_labels, backwards_compat)
form._urlparse = _urlparse
form._urlunparse = _urlunparse
for ii in range(len(controls)):
type, name, attrs = controls[ii]
# index=ii*10 allows ImageControl to return multiple ordered pairs
form.new_control(
type, name, attrs, select_default=select_default, index=ii*10)
forms.append(form)
for form in forms:
form.fixup()
return forms
class Label:
def __init__(self, attrs):
self.id = attrs.get("for")
self._text = attrs.get("__text").strip()
self._ctext = compress_text(self._text)
self.attrs = attrs
self._backwards_compat = False # maintained by HTMLForm
def __getattr__(self, name):
if name == "text":
if self._backwards_compat:
return self._text
else:
return self._ctext
return getattr(Label, name)
def __setattr__(self, name, value):
if name == "text":
# don't see any need for this, so make it read-only
raise AttributeError("text attribute is read-only")
self.__dict__[name] = value
def __str__(self):
return "