[
  {
    "path": ".github/workflows/codeql.yml",
    "content": "# For most projects, this workflow file will not need changing; you simply need\n# to commit it to your repository.\n#\n# You may wish to alter this file to override the set of languages analyzed,\n# or to provide custom queries or build logic.\n#\n# ******** NOTE ********\n# We have attempted to detect the languages in your repository. Please check\n# the `language` matrix defined below to confirm you have the correct set of\n# supported CodeQL languages.\n#\nname: \"CodeQL Advanced\"\n\non:\n  push:\n    branches: [ \"main\" ]\n  pull_request:\n    branches: [ \"main\" ]\n  schedule:\n    - cron: '22 9 * * 2'\n\njobs:\n  analyze:\n    name: Analyze (${{ matrix.language }})\n    # Runner size impacts CodeQL analysis time. To learn more, please see:\n    #   - https://gh.io/recommended-hardware-resources-for-running-codeql\n    #   - https://gh.io/supported-runners-and-hardware-resources\n    #   - https://gh.io/using-larger-runners (GitHub.com only)\n    # Consider using larger runners or machines with greater resources for possible analysis time improvements.\n    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}\n    permissions:\n      # required for all workflows\n      security-events: write\n\n      # required to fetch internal or private CodeQL packs\n      packages: read\n\n      # only required for workflows in private repositories\n      actions: read\n      contents: read\n\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n        - language: python\n          build-mode: none\n        # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'\n        # Use `c-cpp` to analyze code written in C, C++ or both\n        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both\n        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both\n        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,\n        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.\n        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how\n        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages\n    steps:\n    - name: Checkout repository\n      uses: actions/checkout@v4\n\n    # Initializes the CodeQL tools for scanning.\n    - name: Initialize CodeQL\n      uses: github/codeql-action/init@v3\n      with:\n        languages: ${{ matrix.language }}\n        build-mode: ${{ matrix.build-mode }}\n        # If you wish to specify custom queries, you can do so here or in a config file.\n        # By default, queries listed here will override any specified in a config file.\n        # Prefix the list here with \"+\" to use these queries and those in the config file.\n\n        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs\n        # queries: security-extended,security-and-quality\n\n    # If the analyze step fails for one of the languages you are analyzing with\n    # \"We were unable to automatically build your code\", modify the matrix above\n    # to set the build mode to \"manual\" for that language. Then modify this step\n    # to build your code.\n    # ℹ️ Command-line programs to run using the OS shell.\n    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun\n    - if: matrix.build-mode == 'manual'\n      shell: bash\n      run: |\n        echo 'If you are using a \"manual\" build mode for one or more of the' \\\n          'languages you are analyzing, replace this with the commands to build' \\\n          'your code, for example:'\n        echo '  make bootstrap'\n        echo '  make release'\n        exit 1\n\n    - name: Perform CodeQL Analysis\n      uses: github/codeql-action/analyze@v3\n      with:\n        category: \"/language:${{matrix.language}}\"\n"
  },
  {
    "path": ".github/workflows/lint.yml",
    "content": "name: flake8 Lint\n\non: [push, pull_request]\n\njobs:\n  flake8-lint:\n    runs-on: ubuntu-latest\n    name: Lint\n    steps:\n      - name: Check out source repository\n        uses: actions/checkout@v3\n      - name: Set up Python environment\n        uses: actions/setup-python@v4\n        with:\n          python-version: \"3.11\"\n      - name: flake8 Lint\n        uses: py-actions/flake8@v2\n        with:\n          ignore: \"E266,W293,W504,E501\"\n          "
  },
  {
    "path": ".github/workflows/pytest.yml",
    "content": "name: Pytest CI\n\non:\n  push:\n    branches: [ main ]\n  pull_request:\n    branches: [ main ]\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n\n    steps:\n      # 第一步：检出代码\n      - name: Checkout code\n        uses: actions/checkout@v3\n\n      # 第二步：设置 Miniconda\n      - name: Set up Miniconda\n        uses: conda-incubator/setup-miniconda@v2\n        with:\n          auto-update-conda: true            # 自动更新 Conda\n          python-version: '3.9'              # 指定 Python 版\n          activate-environment: phishpedia\n\n      # 保存cache\n      - name: Cache Conda packages and pip cache\n        uses: actions/cache@v3\n        with:\n          path: |\n            ~/.conda/pkgs               # 缓存 Conda 包\n            ~/.cache/pip                # 缓存 pip 包\n            phishpedia/lib/python3.9/site-packages  # 可选：缓存虚拟环境的 site-packages\n          key: ${{ runner.os }}-conda-${{ hashFiles('**/environment.yml', '**/requirements.txt') }}\n          restore-keys: |\n            ${{ runner.os }}-conda-\n\n      # 第三步：升级 pip\n      - name: Upgrade pip\n        run: |\n          python -m pip install --upgrade pip\n        \n\n      # 第四步：克隆 Phishpedia 仓库并运行 setup.sh\n      - name: Clone Phishpedia repo and run setup.sh\n        run: |\n          git clone https://github.com/lindsey98/Phishpedia.git\n          cd Phishpedia\n          chmod +x ./setup.sh\n          ./setup.sh\n        \n\n      # 第五步：安装项目依赖和 pytest\n      - name: Install dependencies and pytest\n        run: |\n          \n          conda run -n phishpedia pip install pytest\n          conda run -n phishpedia pip install validators\n\n\n      # 步骤 6：运行 Pytest 测试\n      - name: Run Pytest\n        run: |\n        \n          conda run -n phishpedia pytest tests/test_logo_matching.py\n          conda run -n phishpedia pytest tests/test_logo_recog.py\n          conda run -n phishpedia pytest tests/test_phishpedia.py\n"
  },
  {
    "path": ".gitignore",
    "content": "*.zip\n*.pkl\n*.pth*\nvenv/\n__pycache__/"
  },
  {
    "path": "LICENSE",
    "content": "Creative Commons Legal Code\n\nCC0 1.0 Universal\n\n    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE\n    LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN\n    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS\n    INFORMATION ON AN \"AS-IS\" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES\n    REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS\n    PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM\n    THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED\n    HEREUNDER.\n\nStatement of Purpose\n\nThe laws of most jurisdictions throughout the world automatically confer\nexclusive Copyright and Related Rights (defined below) upon the creator\nand subsequent owner(s) (each and all, an \"owner\") of an original work of\nauthorship and/or a database (each, a \"Work\").\n\nCertain owners wish to permanently relinquish those rights to a Work for\nthe purpose of contributing to a commons of creative, cultural and\nscientific works (\"Commons\") that the public can reliably and without fear\nof later claims of infringement build upon, modify, incorporate in other\nworks, reuse and redistribute as freely as possible in any form whatsoever\nand for any purposes, including without limitation commercial purposes.\nThese owners may contribute to the Commons to promote the ideal of a free\nculture and the further production of creative, cultural and scientific\nworks, or to gain reputation or greater distribution for their Work in\npart through the use and efforts of others.\n\nFor these and/or other purposes and motivations, and without any\nexpectation of additional consideration or compensation, the person\nassociating CC0 with a Work (the \"Affirmer\"), to the extent that he or she\nis an owner of Copyright and Related Rights in the Work, voluntarily\nelects to apply CC0 to the Work and publicly distribute the Work under its\nterms, with knowledge of his or her Copyright and Related Rights in the\nWork and the meaning and intended legal effect of CC0 on those rights.\n\n1. Copyright and Related Rights. A Work made available under CC0 may be\nprotected by copyright and related or neighboring rights (\"Copyright and\nRelated Rights\"). Copyright and Related Rights include, but are not\nlimited to, the following:\n\n  i. the right to reproduce, adapt, distribute, perform, display,\n     communicate, and translate a Work;\n ii. moral rights retained by the original author(s) and/or performer(s);\niii. publicity and privacy rights pertaining to a person's image or\n     likeness depicted in a Work;\n iv. rights protecting against unfair competition in regards to a Work,\n     subject to the limitations in paragraph 4(a), below;\n  v. rights protecting the extraction, dissemination, use and reuse of data\n     in a Work;\n vi. database rights (such as those arising under Directive 96/9/EC of the\n     European Parliament and of the Council of 11 March 1996 on the legal\n     protection of databases, and under any national implementation\n     thereof, including any amended or successor version of such\n     directive); and\nvii. other similar, equivalent or corresponding rights throughout the\n     world based on applicable law or treaty, and any national\n     implementations thereof.\n\n2. Waiver. To the greatest extent permitted by, but not in contravention\nof, applicable law, Affirmer hereby overtly, fully, permanently,\nirrevocably and unconditionally waives, abandons, and surrenders all of\nAffirmer's Copyright and Related Rights and associated claims and causes\nof action, whether now known or unknown (including existing as well as\nfuture claims and causes of action), in the Work (i) in all territories\nworldwide, (ii) for the maximum duration provided by applicable law or\ntreaty (including future time extensions), (iii) in any current or future\nmedium and for any number of copies, and (iv) for any purpose whatsoever,\nincluding without limitation commercial, advertising or promotional\npurposes (the \"Waiver\"). Affirmer makes the Waiver for the benefit of each\nmember of the public at large and to the detriment of Affirmer's heirs and\nsuccessors, fully intending that such Waiver shall not be subject to\nrevocation, rescission, cancellation, termination, or any other legal or\nequitable action to disrupt the quiet enjoyment of the Work by the public\nas contemplated by Affirmer's express Statement of Purpose.\n\n3. Public License Fallback. Should any part of the Waiver for any reason\nbe judged legally invalid or ineffective under applicable law, then the\nWaiver shall be preserved to the maximum extent permitted taking into\naccount Affirmer's express Statement of Purpose. In addition, to the\nextent the Waiver is so judged Affirmer hereby grants to each affected\nperson a royalty-free, non transferable, non sublicensable, non exclusive,\nirrevocable and unconditional license to exercise Affirmer's Copyright and\nRelated Rights in the Work (i) in all territories worldwide, (ii) for the\nmaximum duration provided by applicable law or treaty (including future\ntime extensions), (iii) in any current or future medium and for any number\nof copies, and (iv) for any purpose whatsoever, including without\nlimitation commercial, advertising or promotional purposes (the\n\"License\"). The License shall be deemed effective as of the date CC0 was\napplied by Affirmer to the Work. Should any part of the License for any\nreason be judged legally invalid or ineffective under applicable law, such\npartial invalidity or ineffectiveness shall not invalidate the remainder\nof the License, and in such case Affirmer hereby affirms that he or she\nwill not (i) exercise any of his or her remaining Copyright and Related\nRights in the Work or (ii) assert any associated claims and causes of\naction with respect to the Work, in either case contrary to Affirmer's\nexpress Statement of Purpose.\n\n4. Limitations and Disclaimers.\n\n a. No trademark or patent rights held by Affirmer are waived, abandoned,\n    surrendered, licensed or otherwise affected by this document.\n b. Affirmer offers the Work as-is and makes no representations or\n    warranties of any kind concerning the Work, express, implied,\n    statutory or otherwise, including without limitation warranties of\n    title, merchantability, fitness for a particular purpose, non\n    infringement, or the absence of latent or other defects, accuracy, or\n    the present or absence of errors, whether or not discoverable, all to\n    the greatest extent permissible under applicable law.\n c. Affirmer disclaims responsibility for clearing rights of other persons\n    that may apply to the Work or any use thereof, including without\n    limitation any person's Copyright and Related Rights in the Work.\n    Further, Affirmer disclaims responsibility for obtaining any necessary\n    consents, permissions or other rights required for any use of the\n    Work.\n d. Affirmer understands and acknowledges that Creative Commons is not a\n    party to this document and has no duty or obligation with respect to\n    this CC0 or use of the Work.\n"
  },
  {
    "path": "Plugin_for_Chrome/README.md",
    "content": "# Plugin_for_Chrome\n\n## Project Overview\n\n`Plugin_for_Chrome` is a Chrome extension project designed to detect phishing websites. \nThe extension automatically retrieves the current webpage's URL and a screenshot when the user presses a predefined hotkey or clicks the extension button, then sends this information to the server for phishing detection. The server utilizes the Flask framework, loads the Phishpedia model for identification, and returns the detection results.\n\n## Directory Structure\n\n```\nPlugin_for_Chrome/\n├── client/\n│   ├── background.js        # Handles the extension's background logic, including hotkeys and button click events.\n│   ├── manifest.json        # Configuration file for the Chrome extension.\n│   └── popup/\n│       ├── popup.html        # HTML file for the extension's popup page.\n│       ├── popup.js          # JavaScript file for the extension's popup page.\n│       └── popup.css         # CSS file for the extension's popup page.\n└── server/\n    └── app.py                # Main program for the Flask server, handling client requests and invoking the Phishpedia model for detection.\n```\n\n## Installation and Usage\n\n### Frontend\n\n1. Open the Chrome browser and navigate to `chrome://extensions/`.\n2. Enable Developer Mode.\n3. Click on \"Load unpacked\" and select the `Plugin_for_Chrome` directory.\n\n### Backend\n\n1. Run the Flask server:\n    ```bash\n    pixi run python -m Plugin_for_Chrome.server.app\n    ```\n## Using the Extension\n\nIn the Chrome browser, press the hotkey `Ctrl+Shift+H` or click the extension button.\nThe extension will automatically capture the current webpage's URL and a screenshot, then send them to the server for analysis.\nThe server will return the detection results, and the extension will display whether the webpage is a phishing site along with the corresponding legitimate website.\n\n## Notes\n\nEnsure that the server is running locally and listening on the default port 5000.\nThe extension and the server must operate within the same network environment.\n\n## Contributing\n\nFeel free to submit issues and contribute code!\n\n"
  },
  {
    "path": "Plugin_for_Chrome/client/background.js",
    "content": "// 处理截图和URL获取\nasync function captureTabInfo(tab) {\n  try {\n    // 获取截图\n    const screenshot = await chrome.tabs.captureVisibleTab(null, {\n      format: 'png'\n    });\n    \n    // 获取当前URL\n    const url = tab.url;\n    \n    // 发送到服务器进行分析\n    const response = await fetch('http://localhost:5000/analyze', {\n      method: 'POST',\n      headers: {\n        'Content-Type': 'application/json',\n      },\n      body: JSON.stringify({\n        url: url,\n        screenshot: screenshot\n      })\n    });\n    \n    const result = await response.json();\n    \n    // 将结果发送到popup\n    chrome.runtime.sendMessage({\n      type: 'analysisResult',\n      data: result\n    });\n    \n  } catch (error) {\n    console.error('Error capturing tab info:', error);\n    chrome.runtime.sendMessage({\n      type: 'error',\n      data: error.message\n    });\n  }\n}\n\n// 监听快捷键命令\nchrome.commands.onCommand.addListener(async (command) => {\n  if (command === '_execute_action') {\n    const [tab] = await chrome.tabs.query({ active: true, currentWindow: true });\n    if (tab) {\n      await captureTabInfo(tab);\n    }\n  }\n});\n\n// 监听来自popup的消息\nchrome.runtime.onMessage.addListener((request, sender, sendResponse) => {\n  if (request.type === 'analyze') {\n    chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {\n      if (tabs[0]) {\n        await captureTabInfo(tabs[0]);\n      }\n    });\n  }\n  return true;\n});"
  },
  {
    "path": "Plugin_for_Chrome/client/manifest.json",
    "content": "{\n  \"manifest_version\": 3,\n  \"name\": \"Phishing Detector\",\n  \"version\": \"1.0\",\n  \"description\": \"Detect phishing websites using screenshot and URL analysis\",\n  \"permissions\": [\n    \"activeTab\",\n    \"scripting\",\n    \"storage\",\n    \"tabs\"\n  ],\n  \"host_permissions\": [\n    \"http://localhost:5000/*\"\n  ],\n  \"action\": {\n    \"default_popup\": \"popup/popup.html\"\n  },\n  \"background\": {\n    \"service_worker\": \"background.js\"\n  },\n  \"commands\": {\n    \"_execute_action\": {\n      \"suggested_key\": {\n        \"default\": \"Ctrl+Shift+H\",\n        \"mac\": \"Command+Shift+H\"\n      },\n      \"description\": \"Analyze current page for phishing\"\n    }\n  }\n}"
  },
  {
    "path": "Plugin_for_Chrome/client/popup/popup.css",
    "content": ".container {\n    width: 300px;\n    padding: 16px;\n  }\n  \n  h1 {\n    font-size: 18px;\n    margin-bottom: 16px;\n  }\n  \n  button {\n    width: 100%;\n    padding: 8px;\n    background-color: #4CAF50;\n    color: white;\n    border: none;\n    border-radius: 4px;\n    cursor: pointer;\n    margin-bottom: 16px;\n  }\n  \n  button:hover {\n    background-color: #45a049;\n  }\n  \n  .hidden {\n    display: none;\n  }\n  \n  #loading {\n    text-align: center;\n    margin: 16px 0;\n  }\n  \n  #result {\n    margin-top: 16px;\n  }\n  \n  .safe {\n    color: #4CAF50;\n  }\n  \n  .dangerous {\n    color: #f44336;\n  }\n  \n  .error-message {\n    color: #f44336;\n  }"
  },
  {
    "path": "Plugin_for_Chrome/client/popup/popup.html",
    "content": "<!DOCTYPE html>\n<html>\n<head>\n  <meta charset=\"UTF-8\">\n  <link rel=\"stylesheet\" href=\"popup.css\">\n</head>\n<body>\n  <div class=\"container\">\n    <h1>Phishing Detector</h1>\n    <button id=\"analyzeBtn\">分析当前页面</button>\n    <div id=\"loading\" class=\"hidden\">\n      分析中...\n    </div>\n    <div id=\"result\" class=\"hidden\">\n      <h2>分析结果:</h2>\n      <div id=\"status\"></div>\n      <div id=\"legitUrl\" class=\"hidden\">\n        <h3>对应的正版网站:</h3>\n        <a id=\"legitUrlLink\" href=\"#\" target=\"_blank\"></a>\n      </div>\n    </div>\n    <div id=\"error\" class=\"hidden\">\n      <p class=\"error-message\"></p>\n    </div>\n  </div>\n  <script src=\"popup.js\"></script>\n</body>\n</html>"
  },
  {
    "path": "Plugin_for_Chrome/client/popup/popup.js",
    "content": "document.addEventListener('DOMContentLoaded', () => {\n  const analyzeBtn = document.getElementById('analyzeBtn');\n  const loading = document.getElementById('loading');\n  const result = document.getElementById('result');\n  const status = document.getElementById('status');\n  const legitUrl = document.getElementById('legitUrl');\n  const legitUrlLink = document.getElementById('legitUrlLink');\n  const error = document.getElementById('error');\n  \n  // 点击分析按钮\n  analyzeBtn.addEventListener('click', () => {\n    // 显示加载状态\n    loading.classList.remove('hidden');\n    result.classList.add('hidden');\n    error.classList.add('hidden');\n    \n    // 发送消息给background script\n    chrome.runtime.sendMessage({\n      type: 'analyze'\n    });\n  });\n  \n  // 监听来自background的消息\n  chrome.runtime.onMessage.addListener((message) => {\n    loading.classList.add('hidden');\n    \n    if (message.type === 'analysisResult') {\n      result.classList.remove('hidden');\n      \n      if (message.data.isPhishing) {\n        status.innerHTML = '<span class=\"dangerous\">⚠️ 警告：这可能是一个钓鱼网站！</span>';\n        if (message.data.legitUrl) {\n          legitUrl.classList.remove('hidden');\n          legitUrlLink.href = message.data.legitUrl;\n          legitUrlLink.textContent = message.data.brand;\n        }\n      } else {\n        status.innerHTML = '<span class=\"safe\">✓ 这是一个安全的网站</span>';\n        legitUrl.classList.add('hidden');\n      }\n    } else if (message.type === 'error') {\n      error.classList.remove('hidden');\n      error.querySelector('.error-message').textContent = message.data;\n    }\n  });\n});"
  },
  {
    "path": "Plugin_for_Chrome/server/app.py",
    "content": "from flask import Flask, request, jsonify\nfrom flask_cors import CORS\nimport base64\nfrom io import BytesIO\nfrom PIL import Image\nfrom datetime import datetime\nimport os\nfrom phishpedia import PhishpediaWrapper, result_file_write\n\napp = Flask(__name__)\nCORS(app)\n\n# 在创建应用时初始化模型\nwith app.app_context():\n    current_dir = os.path.dirname(os.path.realpath(__file__))\n    log_dir = os.path.join(current_dir, 'plugin_logs')\n    os.makedirs(log_dir, exist_ok=True)\n    phishpedia_cls = PhishpediaWrapper()\n\n\n@app.route('/analyze', methods=['POST'])\ndef analyze():\n    try:\n        print('Request received')\n        data = request.get_json()\n        url = data.get('url')\n        screenshot_data = data.get('screenshot')\n\n        # 解码Base64图片数据\n        image_data = base64.b64decode(screenshot_data.split(',')[1])\n        image = Image.open(BytesIO(image_data))\n        screenshot_path = 'temp_screenshot.png'\n        image.save(screenshot_path, format='PNG')\n\n        # 调用Phishpedia模型进行识别\n        phish_category, pred_target, matched_domain, \\\n            plotvis, siamese_conf, pred_boxes, \\\n            logo_recog_time, logo_match_time = phishpedia_cls.test_orig_phishpedia(url, screenshot_path, None)\n\n        # 添加结果处理逻辑\n        result = {\n            \"isPhishing\": bool(phish_category),\n            \"brand\": pred_target if pred_target else \"unknown\",\n            \"legitUrl\": f\"https://{matched_domain[0]}\" if matched_domain else \"unknown\",\n            \"confidence\": float(siamese_conf) if siamese_conf is not None else 0.0\n        }\n\n        # 记录日志\n        today = datetime.now().strftime('%Y%m%d')\n        log_file_path = os.path.join(log_dir, f'{today}_results.txt')\n\n        try:\n            with open(log_file_path, \"a+\", encoding='ISO-8859-1') as f:\n                result_file_write(f, current_dir, url, phish_category, pred_target,\n                                  matched_domain if matched_domain else [\"unknown\"],\n                                  siamese_conf if siamese_conf is not None else 0.0,\n                                  logo_recog_time, logo_match_time)\n        except UnicodeError:\n            with open(log_file_path, \"a+\", encoding='utf-8') as f:\n                result_file_write(f, current_dir, url, phish_category, pred_target,\n                                  matched_domain if matched_domain else [\"unknown\"],\n                                  siamese_conf if siamese_conf is not None else 0.0,\n                                  logo_recog_time, logo_match_time)\n\n        if os.path.exists(screenshot_path):\n            os.remove(screenshot_path)\n\n        return jsonify(result)\n\n    except Exception as e:\n        print(f\"Error in analyze: {str(e)}\")\n        log_error_path = os.path.join(log_dir, 'log_error.txt')\n        with open(log_error_path, \"a+\", encoding='utf-8') as f:\n            f.write(f'{datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")} - {str(e)}\\n')\n        return jsonify(\"ERROR\"), 500\n\n\nif __name__ == '__main__':\n    app.run(host='0.0.0.0', port=5000, debug=False)\n"
  },
  {
    "path": "README.md",
    "content": "# Phishpedia A Hybrid Deep Learning Based Approach to Visually Identify Phishing Webpages\n\n<div align=\"center\">\n\n![Dialogues](https://img.shields.io/badge/Proctected_Brands_Size-277-green?style=flat-square)\n![Dialogues](https://img.shields.io/badge/Phishing_Benchmark_Size-30k-green?style=flat-square)\n\n</div>\n<p align=\"center\">\n  <a href=\"https://www.usenix.org/conference/usenixsecurity21/presentation/lin\">Paper</a> •\n  <a href=\"https://sites.google.com/view/phishpedia-site/\">Website</a> •\n  <a href=\"https://www.youtube.com/watch?v=ZQOH1RW5DmY\">Video</a> •\n   <a href=\"https://drive.google.com/file/d/12ypEMPRQ43zGRqHGut0Esq2z5en0DH4g/view?usp=drive_link\">Dataset</a> •\n  <a href=\"#citation\">Citation</a>\n</p>\n\n- This is the official implementation of \"Phishpedia: A Hybrid Deep Learning Based Approach to Visually Identify Phishing Webpages\" USENIX'21 [link to paper](https://www.usenix.org/conference/usenixsecurity21/presentation/lin), [link to our website](https://sites.google.com/view/phishpedia-site/), [link to our dataset](https://drive.google.com/file/d/12ypEMPRQ43zGRqHGut0Esq2z5en0DH4g/view?usp=drive_link).\n\n- Existing reference-based phishing detectors:\n  - :x: Lack of **interpretability**, only give binary decision (legit or phish)\n  - :x: **Not robust against distribution shift**, because the classifier is biased towards the phishing training set\n  - :x: **Lack of a large-scale phishing benchmark** dataset\n- The contributions of our paper:\n  - :white_check_mark: We propose a phishing identification system Phishpedia, which has high identification accuracy and low runtime overhead, outperforming the relevant state-of-the-art identification approaches.\n  - :white_check_mark: We are the first to propose to use **consistency-based method** for phishing detection, in place of the traditional classification-based method. We investigate the consistency between the webpage domain and its brand intention. The detected brand intention provides a **visual explanation** for phishing decision.\n  - :white_check_mark: Phishpedia is **NOT trained on any phishing dataset**, addressing the potential test-time distribution shift problem.\n  - :white_check_mark: We release a **30k phishing benchmark dataset**, each website is annotated with its URL, HTML, screenshot, and target brand: https://drive.google.com/file/d/12ypEMPRQ43zGRqHGut0Esq2z5en0DH4g/view?usp=drive_link.\n  - :white_check_mark: We set up a **phishing monitoring system**, investigating emerging domains fed from CertStream, and we have discovered 1,704 real phishing, out of which 1133 are zero-days not reported by industrial antivirus engine (Virustotal).\n\n\n## Framework\n\n<img src=\"./datasets/overview.png\" style=\"width:2000px;height:350px\"/>\n\n`Input`: A URL and its screenshot `Output`: Phish/Benign, Phishing target\n\n- Step 1: Enter <b>Deep Object Detection Model</b>, get predicted logos and inputs (inputs are not used for later prediction, just for explanation)\n\n- Step 2: Enter <b>Deep Siamese Model</b>\n  - If Siamese report no target, `Return  Benign, None`\n  - Else Siamese report a target, `Return Phish, Phishing target`\n\n\n## Setup\n\nPrerequisite: [Pixi installed](https://pixi.sh/latest/)\n\nFor Linux/Mac,\n\n  ```bash\n  export KMP_DUPLICATE_LIB_OK=TRUE\n  git clone https://github.com/lindsey98/Phishpedia.git\n  cd Phishpedia\n  pixi install\n  chmod +x setup.sh\n  ./setup.sh\n  ```\n\nFor Windows, in PowerShell,\n\n  ```bash\n  git clone https://github.com/lindsey98/Phishpedia.git\n  cd Phishpedia\n  pixi install\n  setup.bat\n  ```\n\n## Running Phishpedia from Command Line\n\n```bash\npixi run python phishpedia.py --folder <folder you want to test e.g. ./datasets/test_sites>\n```\n\nThe testing folder should be in the structure of:\n\n```\ntest_site_1\n|__ info.txt (Write the URL)\n|__ shot.png (Save the screenshot)\ntest_site_2\n|__ info.txt (Write the URL)\n|__ shot.png (Save the screenshot)\n......\n```\n\n## Running Phishpedia as a GUI tool (web-browser-based)\n  \nSee [WEBtool/](WEBtool/)\n\n## Install Phishpedia as a Chrome plugin\n  \nSee [Plugin_for_Chrome/](Plugin_for_Chrome/)\n\n\n## Project structure\n\n```\n- models/\n|___ rcnn_bet365.pth\n|___ faster_rcnn.yaml\n|___ resnetv2_rgb_new.pth.tar\n|___ expand_targetlist/\n  |___ Adobe/\n  |___ Amazon/\n  |___ ......\n|___ domain_map.pkl\n- logo_recog.py: Deep Object Detection Model\n- logo_matching.py: Deep Siamese Model\n- configs.yaml: Configuration file\n- phishpedia.py: Main script\n```\n\n## Miscellaneous\n- In our paper, we also implement several phishing detection and identification baselines, see [here](https://github.com/lindsey98/PhishingBaseline)\n- The logo targetlist described in our paper includes 181 brands, we have further expanded the targetlist to include 277 brands in this code repository \n- For the phish discovery experiment, we obtain feed from [Certstream phish_catcher](https://github.com/x0rz/phishing_catcher), we lower the score threshold to be 40 to process more suspicious websites, readers can refer to their repo for details\n- We use Scrapy for website crawling\n\n## Citation\n\nIf you find our work useful in your research, please consider citing our paper by:\n\n```bibtex\n@inproceedings{lin2021phishpedia,\n  title={Phishpedia: A Hybrid Deep Learning Based Approach to Visually Identify Phishing Webpages},\n  author={Lin, Yun and Liu, Ruofan and Divakaran, Dinil Mon and Ng, Jun Yang and Chan, Qing Zhou and Lu, Yiwen and Si, Yuxuan and Zhang, Fan and Dong, Jin Song},\n  booktitle={30th $\\{$USENIX$\\}$ Security Symposium ($\\{$USENIX$\\}$ Security 21)},\n  year={2021}\n}\n```\n\n## Contacts\n\nIf you have any issues running our code, you can raise an issue or send an email to liu.ruofan16@u.nus.edu, lin_yun@sjtu.edu.cn, and dcsdjs@nus.edu.sg\n"
  },
  {
    "path": "WEBtool/app.py",
    "content": "from flask import Flask, request, jsonify\nfrom flask_cors import CORS\nimport base64\nfrom io import BytesIO\nfrom PIL import Image\nfrom datetime import datetime\nimport os\nfrom phishpedia import PhishpediaWrapper, result_file_write\n\napp = Flask(__name__)\nCORS(app)\n\n# 在创建应用时初始化模型\nwith app.app_context():\n    current_dir = os.path.dirname(os.path.realpath(__file__))\n    log_dir = os.path.join(current_dir, 'plugin_logs')\n    os.makedirs(log_dir, exist_ok=True)\n    phishpedia_cls = PhishpediaWrapper()\n\n\n@app.route('/analyze', methods=['POST'])\ndef analyze():\n    try:\n        print('Request received')\n        data = request.get_json()\n        url = data.get('url')\n        screenshot_data = data.get('screenshot')\n\n        # 解码Base64图片数据\n        image_data = base64.b64decode(screenshot_data.split(',')[1])\n        image = Image.open(BytesIO(image_data))\n        screenshot_path = 'temp_screenshot.png'\n        image.save(screenshot_path, format='PNG')\n\n        # 调用Phishpedia模型进行识别\n        phish_category, pred_target, matched_domain, \\\n            plotvis, siamese_conf, pred_boxes, \\\n            logo_recog_time, logo_match_time = phishpedia_cls.test_orig_phishpedia(url, screenshot_path, None)\n\n        # 添加结果处理逻辑\n        result = {\n            \"isPhishing\": bool(phish_category),\n            \"brand\": pred_target if pred_target else \"unknown\",\n            \"legitUrl\": f\"https://{matched_domain[0]}\" if matched_domain else \"unknown\",\n            \"confidence\": float(siamese_conf) if siamese_conf is not None else 0.0\n        }\n\n        # 记录日志\n        today = datetime.now().strftime('%Y%m%d')\n        log_file_path = os.path.join(log_dir, f'{today}_results.txt')\n\n        try:\n            with open(log_file_path, \"a+\", encoding='ISO-8859-1') as f:\n                result_file_write(f, current_dir, url, phish_category, pred_target,\n                                  matched_domain if matched_domain else [\"unknown\"],\n                                  siamese_conf if siamese_conf is not None else 0.0,\n                                  logo_recog_time, logo_match_time)\n        except UnicodeError:\n            with open(log_file_path, \"a+\", encoding='utf-8') as f:\n                result_file_write(f, current_dir, url, phish_category, pred_target,\n                                  matched_domain if matched_domain else [\"unknown\"],\n                                  siamese_conf if siamese_conf is not None else 0.0,\n                                  logo_recog_time, logo_match_time)\n\n        if os.path.exists(screenshot_path):\n            os.remove(screenshot_path)\n\n        return jsonify(result)\n\n    except Exception as e:\n        print(f\"Error in analyze: {str(e)}\")\n        log_error_path = os.path.join(log_dir, 'log_error.txt')\n        with open(log_error_path, \"a+\", encoding='utf-8') as f:\n            f.write(f'{datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")} - {str(e)}\\n')\n        return jsonify(\"ERROR\"), 500\n\n\nif __name__ == '__main__':\n    app.run(host='0.0.0.0', port=5000, debug=False)\n"
  },
  {
    "path": "WEBtool/phishpedia_web.py",
    "content": "import os\nimport shutil\nfrom flask import request, Flask, jsonify, render_template, send_from_directory\nfrom flask_cors import CORS\nfrom utils_web import allowed_file, convert_to_base64, domain_map_add, domain_map_delete, check_port_inuse, initial_upload_folder\nfrom configs import load_config\nfrom phishpedia import PhishpediaWrapper\n\nphishpedia_cls = None\n\n# flask for API server\napp = Flask(__name__)\ncors = CORS(app, supports_credentials=True)\napp.config['CORS_HEADERS'] = 'Content-Type'\napp.config['UPLOAD_FOLDER'] = 'static/uploads'\napp.config['FILE_TREE_ROOT'] = '../models/expand_targetlist'  # 主目录路径\napp.config['DOMAIN_MAP_PATH'] = '../models/domain_map.pkl'\n\n\n@app.route('/')\ndef index():\n    \"\"\"渲染主页面\"\"\"\n    return render_template('index.html')\n\n\n@app.route('/upload', methods=['POST'])\ndef upload_file():\n    \"\"\"处理文件上传请求\"\"\"\n    if 'image' not in request.files:\n        return jsonify({'error': 'No file part'}), 400\n    file = request.files['image']\n    \n    if file.filename == '':\n        return jsonify({'error': 'No selected file'}), 400\n\n    if file and allowed_file(file.filename):\n        filename = file.filename\n        if filename.count('.') > 1:\n            return jsonify({'error': 'Invalid file name'}), 400\n        elif any(sep in filename for sep in (os.sep, os.altsep)):\n            return jsonify({'error': 'Invalid file name'}), 400\n        elif '..' in filename:\n            return jsonify({'error': 'Invalid file name'}), 400\n        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)\n        file_path = os.path.normpath(file_path)\n        if not file_path.startswith(app.config['UPLOAD_FOLDER']):\n            return jsonify({'error': 'Invalid file path'}), 400\n        file.save(file_path)\n        return jsonify({'success': True, 'imageUrl': f'/uploads/{filename}'}), 200\n\n    return jsonify({'error': 'Invalid file type'}), 400\n\n\n@app.route('/uploads/<filename>')\ndef uploaded_file(filename):\n    \"\"\"提供上传文件的访问路径\"\"\"\n    return send_from_directory(app.config['UPLOAD_FOLDER'], filename)\n\n\n@app.route('/clear_upload', methods=['POST'])\ndef delete_image():\n    data = request.get_json()\n    image_url = data.get('imageUrl')\n\n    if not image_url:\n        return jsonify({'success': False, 'error': 'No image URL provided'}), 400\n\n    try:\n        # 假设 image_url 是相对于静态目录的路径\n        filename = image_url.split('/')[-1]\n        image_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)\n        image_path = os.path.normpath(image_path)\n        if not image_path.startswith(app.config['UPLOAD_FOLDER']):\n            return jsonify({'success': False, 'error': 'Invalid file path'}), 400\n        os.remove(image_path)\n        return jsonify({'success': True}), 200\n    except Exception:\n        return jsonify({'success': False}), 500\n\n\n@app.route('/detect', methods=['POST'])\ndef detect():\n    data = request.json\n    url = data.get('url', '')\n    imageUrl = data.get('imageUrl', '')\n    \n    filename = imageUrl.split('/')[-1]\n    screenshot_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)\n    screenshot_path = os.path.normpath(screenshot_path)\n    if not screenshot_path.startswith(app.config['UPLOAD_FOLDER']):\n        return jsonify({'success': False, 'error': 'Invalid file path'}), 400\n\n    phish_category, pred_target, matched_domain, plotvis, siamese_conf, _, logo_recog_time, logo_match_time = phishpedia_cls.test_orig_phishpedia(\n        url, screenshot_path, None)\n    \n    # 处理检测结果\n    if phish_category == 0:\n        if pred_target is None:\n            result = 'Unknown'\n        else:\n            result = 'Benign'\n    else:\n        result = 'Phishing'\n\n    plot_base64 = convert_to_base64(plotvis)\n\n    # 返回检测结果\n    result = {\n        'result': result,  # 检测结果\n        'matched_brand': pred_target,  # 匹配到的品牌\n        'correct_domain': matched_domain,  # 正确的域名\n        'confidence': round(float(siamese_conf), 3),  # 置信度，直接返回百分比\n        'detection_time': round(float(logo_recog_time) + float(logo_match_time), 3),  # 检测时间\n        'logo_extraction': plot_base64  # logo标注结果，直接返回图像\n    }\n    return jsonify(result)\n\n\n@app.route('/get-directory', methods=['GET'])\ndef get_file_tree():\n    \"\"\"\n    获取主目录的文件树\n    \"\"\"\n    def build_file_tree(path):\n        tree = []\n        try:\n            for entry in os.listdir(path):\n                entry_path = os.path.join(path, entry)\n                entry_path = os.path.normpath(entry_path)\n                if not entry_path.startswith(path):\n                    continue\n                if os.path.isdir(entry_path):\n                    tree.append({\n                        'name': entry,\n                        'type': 'directory',\n                        'children': build_file_tree(entry_path)  # 递归子目录\n                    })\n                elif entry.lower().endswith(('.png', '.jpeg', '.jpg')):\n                    tree.append({\n                        'name': entry,\n                        'type': 'file'\n                    })\n                else:\n                    continue\n        except PermissionError:\n            pass  # 忽略权限错误\n        return sorted(tree, key=lambda x: x['name'].lower())  # 按 name 字段排序，不区分大小写\n\n    root_path = app.config['FILE_TREE_ROOT']\n    if not os.path.exists(root_path):\n        return jsonify({'error': 'Root directory does not exist'}), 404\n\n    file_tree = build_file_tree(root_path)\n    return jsonify({'file_tree': file_tree}), 200\n\n\n@app.route('/view-file', methods=['GET'])\ndef view_file():\n    file_name = request.args.get('file')\n    file_path = os.path.join(app.config['FILE_TREE_ROOT'], file_name)\n    file_path = os.path.normpath(file_path)\n    if not file_path.startswith(app.config['FILE_TREE_ROOT']):\n        return jsonify({'error': 'Invalid file path'}), 400\n\n    if not os.path.exists(file_path):\n        return jsonify({'error': 'File not found'}), 404\n\n    if file_name.lower().endswith(('.png', '.jpeg', '.jpg')):\n        return send_from_directory(app.config['FILE_TREE_ROOT'], file_name)\n    \n    return jsonify({'error': 'Unsupported file type'}), 400\n\n\n@app.route('/add-logo', methods=['POST'])\ndef add_logo():\n    if 'logo' not in request.files:\n        return jsonify({'success': False, 'error': 'No file part'}), 400\n\n    logo = request.files['logo']\n    if logo.filename == '':\n        return jsonify({'success': False, 'error': 'No selected file'}), 400\n\n    if logo and allowed_file(logo.filename):\n        directory = request.form.get('directory')\n        if not directory:\n            return jsonify({'success': False, 'error': 'No directory specified'}), 400\n\n        directory_path = os.path.join(app.config['FILE_TREE_ROOT'], directory)\n        directory_path = os.path.normpath(directory_path)\n        if not directory_path.startswith(app.config['FILE_TREE_ROOT']):\n            return jsonify({'success': False, 'error': 'Invalid directory path'}), 400\n        \n        if not os.path.exists(directory_path):\n            return jsonify({'success': False, 'error': 'Directory does not exist'}), 400\n\n        file_path = os.path.join(directory_path, logo.filename)\n        file_path = os.path.normpath(file_path)\n        if not file_path.startswith(directory_path):\n            return jsonify({'success': False, 'error': 'Invalid file path'}), 400\n        logo.save(file_path)\n        return jsonify({'success': True, 'message': 'Logo added successfully'}), 200\n\n    return jsonify({'success': False, 'error': 'Invalid file type'}), 400\n\n\n@app.route('/del-logo', methods=['POST'])\ndef del_logo():\n    directory = request.form.get('directory')\n    filename = request.form.get('filename')\n\n    if not directory or not filename:\n        return jsonify({'success': False, 'error': 'Directory and filename must be specified'}), 400\n\n    directory_path = os.path.join(app.config['FILE_TREE_ROOT'], directory)\n    directory_path = os.path.normpath(directory_path)\n    if not directory_path.startswith(app.config['FILE_TREE_ROOT']):\n        return jsonify({'success': False, 'error': 'Invalid directory path'}), 400\n    file_path = os.path.join(directory_path, filename)\n    file_path = os.path.normpath(file_path)\n    if not file_path.startswith(directory_path):\n        return jsonify({'success': False, 'error': 'Invalid file path'}), 400\n\n    if not os.path.exists(file_path):\n        return jsonify({'success': False, 'error': 'File does not exist'}), 400\n\n    try:\n        os.remove(file_path)\n        return jsonify({'success': True, 'message': 'Logo deleted successfully'}), 200\n    except Exception:\n        return jsonify({'success': False}), 500\n\n\n@app.route('/add-brand', methods=['POST'])\ndef add_brand():\n    brand_name = request.form.get('brandName')\n    brand_domain = request.form.get('brandDomain')\n\n    if not brand_name or not brand_domain:\n        return jsonify({'success': False, 'error': 'Brand name and domain must be specified'}), 400\n\n    # 创建品牌目录\n    brand_directory_path = os.path.join(app.config['FILE_TREE_ROOT'], brand_name)\n    brand_directory_path = os.path.normpath(brand_directory_path)\n    if not brand_directory_path.startswith(app.config['FILE_TREE_ROOT']):\n        return jsonify({'success': False, 'error': 'Invalid brand directory path'}), 400\n    \n    if os.path.exists(brand_directory_path):\n        return jsonify({'success': False, 'error': 'Brand already exists'}), 400\n\n    try:\n        os.makedirs(brand_directory_path)\n        domain_map_add(brand_name, brand_domain, app.config['DOMAIN_MAP_PATH'])\n        return jsonify({'success': True, 'message': 'Brand added successfully'}), 200\n    except Exception:\n        return jsonify({'success': False}), 500\n\n\n@app.route('/del-brand', methods=['POST'])\ndef del_brand():\n    directory = request.json.get('directory')\n\n    if not directory:\n        return jsonify({'success': False, 'error': 'Directory must be specified'}), 400\n\n    directory_path = os.path.join(app.config['FILE_TREE_ROOT'], directory)\n    directory_path = os.path.normpath(directory_path)\n    if not directory_path.startswith(app.config['FILE_TREE_ROOT']):\n        return jsonify({'success': False, 'error': 'Invalid directory path'}), 400\n\n    if not os.path.exists(directory_path):\n        return jsonify({'success': False, 'error': 'Directory does not exist'}), 400\n\n    try:\n        shutil.rmtree(directory_path)\n        domain_map_delete(directory, app.config['DOMAIN_MAP_PATH'])\n        return jsonify({'success': True, 'message': 'Brand deleted successfully'}), 200\n    except Exception:\n        return jsonify({'success': False}), 500\n\n\n@app.route('/reload-model', methods=['POST'])\ndef reload_model():\n    global phishpedia_cls\n    try:\n        load_config(reload_targetlist=True)\n        # Reinitialize Phishpedia\n        phishpedia_cls = PhishpediaWrapper()\n        return jsonify({'success': True, 'message': 'Brand deleted successfully'}), 200\n    except Exception:\n        return jsonify({'success': False}), 500\n\n\nif __name__ == \"__main__\":\n    ip_address = '0.0.0.0'\n    port = 5000\n    while check_port_inuse(port, ip_address):\n        port = port + 1\n\n    # 加载核心检测逻辑\n    phishpedia_cls = PhishpediaWrapper()\n\n    initial_upload_folder(app.config['UPLOAD_FOLDER'])\n    \n    app.run(host=ip_address, port=port)\n"
  },
  {
    "path": "WEBtool/readme.md",
    "content": "# Phishpedia Web Tool\n\nThis is a web tool for Phishpedia which provides a user-friendly interface with brand and domain management capabilities, as well as visualization features for phishing detection.\n\n## How to Run\n\nRun the following command in the web tool directory:\n\n```bash\npixi run python WEBtool/phishpedia_web.py\n```\n\nyou should see an URL after the server is started (http://127.0.0.1:500x). Visit it in your browser.\n\n## User Guide\n\n### 1. Main Page (For phishing detection)\n\n![image-20241228141453032](./mainpage.png)\n\n1. **URL Detection**\n   - Enter the URL to be tested in the \"Enter URL\" input box\n   - Click the \"Upload Image\" button to select the corresponding website screenshot\n   - Click the \"Start Detection!\" button to start detection\n   - Detection results will be displayed below, including text results and visual presentation\n2. **Result Display**\n   - The original image with logo extracted will be displayed in the \"Logo Extraction\" box\n   - Detection results will be displayed in the \"Detection Result\" box, together with a synthetic explanation\n   - You can clearly see the detected brand identifiers and related information\n\n### 2. Sidebar (For database management)\n\nClick the sidebar button \"☰\" at top right corner, this will trigger a sidebar showing database at backend.\n\n![image-20241228141419609](./sidebar.png)\n\n1. **Brand Management**\n   - Click \"Add Brand\" to add a new brand\n   - Enter brand name and corresponding domains in the form\n   - Click one brand to select, and click \"Delete Brand\" to remove the selected brand\n   - Double-click one brand to see the logo under this brand\n2. **Logo Management**\n   - Click one brand to select, and click \"Add Logo\" to add brand logos\n   - Click one logo to select, and click \"Delete Logo\" to remove selected logo\n3. **Data Update**\n   - After making changes, click the \"Reload Model\" button\n   - The system will reload the updated dataset\n\n## Main Features\n\n1. **Phishing Detection**\n\n   - URL input and detection\n   - Screenshot upload and analysis\n   - Detection result visualization\n\n2. **Brand Management**\n   - Add/Delete brands\n   - Add/Delete brand logos\n   - Domain management\n   - Model reloading\n\n## Directory Structure\n\n```\nWEBtool/\n├── static/             # Static resources like css,icon\n├── templates/          # Web page\n├── phishpedia_web.py   # A flask server\n├── utils_web.py        # Help functions for server\n├── readme.md           # Documentation\n└── requirements.txt    # Dependency list\n```\n"
  },
  {
    "path": "WEBtool/static/css/sidebar.css",
    "content": "/* 侧边栏样式 */\n.sidebar {\n    position: fixed;\n    top: 0;\n    right: -400px;\n    width: 300px;\n    height: 100%;\n    background-color: #ffffff;\n    box-shadow: -2px 0 5px rgba(0, 0, 0, 0.1);\n    transition: right 0.3s ease;\n    z-index: 1000;\n    display: flex;\n    flex-direction: column;\n    padding: 20px;\n}\n\n/* 侧边栏打开时显示 */\n.sidebar.open {\n    right: 0;\n}\n\n/* 侧边栏标题 */\n.sidebar-header {\n    display: flex;\n    justify-content: space-between;\n    align-items: center;\n    font-size: 18px;\n    font-weight: bold;\n    margin-bottom: 20px;\n}\n\n/* 关闭按钮 */\n.close-sidebar {\n    background: none;\n    border: none;\n    font-size: 18px;\n    cursor: pointer;\n    color: #333;\n}\n\n/* 右上角按钮样式 */\n.sidebar-toggle {\n    position: absolute;\n    top: 15px;\n    right: 15px;\n    background: #87CEFA;\n    color: white;\n    border: none;\n    border-radius: 5px;\n    padding: 10px 15px;\n    font-size: 18px;\n    font-weight: bold;\n    cursor: pointer;\n    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);\n    transition: background-color 0.3s ease;\n}\n\n.sidebar-toggle:hover {\n    background-color: #0056b3;\n}\n\n/* 按钮容器样式 */\n.sidebar-buttons {\n    display: flex;\n    flex-wrap: wrap;\n    gap: 10px;\n    margin-bottom: 20px;\n    justify-content: space-between;\n}\n\n/* 按钮基础样式 */\n.sidebar-button {\n    flex: 1 1 calc(50% - 10px);\n    display: flex;\n    justify-content: center;\n    align-items: center;\n    background-color: #87CEFA;\n    color: white;\n    font-size: 14px;\n    font-weight: bold;\n    border: none;\n    border-radius: 3px;\n    padding: 5px 10px;\n    cursor: pointer;\n    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);\n    transition: background-color 0.3s ease, transform 0.2s ease;\n}\n\n/* 按钮悬停效果 */\n.sidebar-button:hover {\n    background-color: #0056b3;\n    transform: translateY(-2px);\n}\n\n/* 按钮点击效果 */\n.sidebar-button:active {\n    background-color: #003d80;\n    transform: translateY(0);\n}\n\n/* ============ 文件树 ============ */\n/* 文件树样式 */\n#file-tree-root {\n    list-style-type: none;\n    padding-left: 20px;\n    height: 580px;\n    max-height: 580px;\n    overflow-y: auto;\n    border: 1px solid #ccc;\n    background-color: white;\n    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);\n}\n\n.file-item {\n    margin-bottom: 5px;\n}\n\n.file-folder {\n    cursor: pointer;\n}\n\n.folder-name {\n    display: flex;\n    align-items: center;\n}\n\n.folder-icon {\n    margin-right: 5px;\n}\n\n.file-file {\n    cursor: pointer;\n}\n\n.file-icon {\n    margin-right: 5px;\n}\n\n.hidden {\n    display: none;\n}\n\n\n.file-folder>ul {\n    padding-left: 20px;\n}\n\n/* 预览框样式 */\n#image-preview-box {\n    position: absolute;\n    background-color: white;\n    border: 1px solid #ccc;\n    padding: 10px;\n    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);\n    max-width: 400px;\n    max-height: 300px;\n    overflow: hidden;\n}\n\n/* 选中样式 */\n.selected {\n    border: 2px solid #007bff;\n    padding: 2px;\n    box-sizing: border-box;\n}\n\n\n/* ============== 表单 ============= */\n.form-container {\n    position: fixed;\n    top: 50%;\n    left: 50%;\n    transform: translate(-50%, -50%);\n    background-color: #ffffff;\n    padding: 20px 30px;\n    border-radius: 10px;\n    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);\n    width: 300px;\n    max-width: 90%;\n    z-index: 1001;\n}\n\n/* 表单标题 */\n.form-container h3 {\n    font-size: 22px;\n    font-weight: bold;\n    color: #333;\n    margin-bottom: 20px;\n    text-align: center;\n    font-family: 'Arial', sans-serif;\n}\n\ninput[type=\"label\"] {\n    width: 20%;\n}\n\n/* 输入框样式 */\ninput[type=\"text\"] {\n    width: 90%;\n    padding: 12px;\n    margin: 12px 0;\n    border: 1px solid #ddd;\n    border-radius: 8px;\n    background-color: #f9f9f9;\n    font-size: 16px;\n    color: #333;\n    box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);\n    transition: border-color 0.3s ease, background-color 0.3s ease;\n    text-align: center;\n}\n\n/* 输入框聚焦效果 */\ninput[type=\"text\"]:focus {\n    border-color: #3498db;\n    background-color: #fff;\n    outline: none;\n}\n\n/* 提交按钮样式 */\nbutton[type=\"submit\"] {\n    background-color: #3498db;\n    color: white;\n}\n\n/* 取消按钮样式 */\nbutton[type=\"button\"] {\n    background-color: #7c7c7c;\n    color: white;\n}\n\n/* 表单按钮容器 */\n.form-actions {\n    width: 100%;\n    display: flex;\n    justify-content: space-between;\n    gap: 12px;\n    margin-top: 20px;\n}\n\n/* 提交按钮样式 */\nbutton[type=\"submit\"] {\n    background-color: #3498db;\n    color: white;\n    padding: 10px 20px;\n    border: none;\n    border-radius: 5px;\n    font-size: 14px;\n    cursor: pointer;\n    transition: background-color 0.3s ease, transform 0.2s ease;\n}\n\n/* 提交按钮悬停效果 */\nbutton[type=\"submit\"]:hover {\n    background-color: #2980b9;\n    transform: translateY(-2px);\n}\n\n/* 提交按钮点击效果 */\nbutton[type=\"submit\"]:active {\n    background-color: #1abc9c;\n    transform: translateY(0);\n}\n\n/* 取消按钮样式 */\nbutton[type=\"button\"] {\n    background-color: #7c7c7c;\n    color: white;\n    padding: 10px 20px;\n    border: none;\n    border-radius: 5px;\n    font-size: 14px;\n    cursor: pointer;\n    transition: background-color 0.3s ease, transform 0.2s ease;\n}\n\n/* 取消按钮悬停效果 */\nbutton[type=\"button\"]:hover {\n    background-color: #555;\n    transform: translateY(-2px);\n}\n\n/* 取消按钮点击效果 */\nbutton[type=\"button\"]:active {\n    background-color: #333;\n    transform: translateY(0);\n}\n\n/* 浮层样式 */\n#overlay {\n    position: fixed;\n    top: 0;\n    left: 0;\n    width: 100%;\n    height: 100%;\n    background-color: rgba(0, 0, 0, 0.5);\n    display: flex;\n    justify-content: center;\n    align-items: center;\n    z-index: 1002;\n}\n\n/* 转圈动画样式 */\n#spinner {\n    border: 2px solid #f3f3f3;\n    border-top: 2px solid #3498db;\n    border-radius: 50%;\n    width: 16px;\n    height: 16px;\n    animation: spin 2s linear infinite;\n    margin-right: 10px;\n}\n\n/* 转圈动画 */\n@keyframes spin {\n    0% {\n        transform: rotate(0deg);\n    }\n\n    100% {\n        transform: rotate(360deg);\n    }\n}\n\n/* 浮层中的文本样式 */\n#overlay p {\n    color: white;\n    font-size: 16px;\n    font-weight: bold;\n    text-align: center;\n    line-height: 16px;\n    margin: 0;\n}\n\n#overlay .spinner-container {\n    display: flex;\n    align-items: center;\n}"
  },
  {
    "path": "WEBtool/static/css/style.css",
    "content": "body,\nhtml {\n    margin: 0;\n    padding: 0;\n    font-family: Arial, sans-serif;\n    background-color: #faf4f2;\n}\n\nul {\n    list-style-type: none;\n    padding: 0;\n}\n\nli {\n    margin: 5px 0;\n}\n\n#header {\n    display: flex;\n    align-items: center;\n    justify-content: flex-start;\n    position: absolute;\n    top: 0px;\n    left: 0px;\n    background-color: rgba(255, 255, 255, 0.8);\n    padding: 10px 10px;\n    border-radius: 5px;\n    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);\n    width: 100%;\n    margin-bottom: 10px;\n}\n\n#logo-icon {\n    height: 60px;\n    width: auto;\n    margin-right: 20px;\n}\n\n#logo-text {\n    display: flex;\n    align-items: center;\n    height: 80px;\n    line-height: 80px;\n    letter-spacing: 2px;\n    background: linear-gradient(90deg, #3498db, #f9f388);\n    -webkit-background-clip: text;\n    background-clip: text;\n    -webkit-text-fill-color: transparent;\n    text-shadow: 1px 1px 3px rgba(0, 0, 0, 0.2);\n    font-size: 35px;\n    font-weight: bold;\n}\n\n\n#main-container {\n    display: flex;\n    flex-direction: column;\n    align-items: center;\n    width: 100%;\n    margin-top: 130px;\n}\n\n#input-container {\n    display: flex;\n    flex-direction: column;\n    align-items: center;\n    width: 1200px;\n    padding: 20px;\n    border-radius: 8px;\n    border: 1px solid #ddd;\n    background-color: #dff0fb;\n}\n\n.inner-container {\n    width: 100%;\n    height: 100%;\n    display: flex;\n    flex-direction: column;\n    align-items: center;\n    border-radius: 5px;\n    border: 3px dashed white;\n    background-color: #eaf4fb;\n    padding-top: 20px;\n    padding-bottom: 20px;\n}\n\n#output-container {\n    display: flex;\n    flex-direction: column;\n    align-items: center;\n    width: 1240px;\n    margin-top: 10px;\n}\n\n/* ============================= URL输入区域 =============================*/\n#url-input-container {\n    display: flex;\n    justify-content: center;\n    align-items: center;\n    gap: 10px;\n    width: 500px;\n}\n\n.custom-label {\n    background-color: #87CEFA;\n    color: white;\n    border-radius: 25px;\n    padding: 10px 20px;\n    font-size: 16px;\n    font-weight: bold;\n    border: none;\n    text-align: center;\n    white-space: nowrap;\n}\n\n#url-input {\n    background-color: #dcdcdc;\n    color: #333;\n    border: none;\n    border-radius: 15px;\n    padding: 10px 20px;\n    font-size: 16px;\n    outline: none;\n    width: 300px;\n    box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);\n}\n\n#url-input::placeholder {\n    color: #888;\n    font-style: italic;\n}\n\n/* ============================= 图片上传区域 =============================*/\n#image-upload-container {\n    display: flex;\n    justify-content: center;\n    align-items: center;\n    width: 410px;\n}\n\n.drop-area {\n    border: 2px dashed #007BFF;\n    border-radius: 8px;\n    background-color: #ffffff;\n    padding: 20px;\n    text-align: center;\n    font-size: 1.2em;\n    color: #004085;\n    margin-top: 10px;\n    width: 100%;\n    height: 20vh;\n    margin: 20px auto;\n    transition: background-color 0.3s ease;\n}\n\n\n.upload-icon {\n    width: 50px;\n    height: 50px;\n    margin-bottom: 10px;\n}\n\n.upload-label {\n    cursor: pointer;\n    margin-bottom: -10px;\n    background-color: white;\n    color: black;\n    padding: 10px 20px;\n    border: 2px solid #ccc;\n    border-radius: 50%;\n    border-radius: 6px;\n    text-align: center;\n    font-size: small;\n    display: inline-block;\n    line-height: 1;\n    font-family: Arial,\n        sans-serif;\n}\n\n.upload-label:hover {\n    background-color: #f0f0f0;\n}\n\n.upload-success-area {\n    display: flex;\n    flex-direction: column;\n    align-items: center;\n    justify-content: center;\n    padding: 20px;\n    border: 2px dashed #007BFF;\n    border-radius: 8px;\n    background-color: #ffffff;\n    margin-top: 10px;\n    margin-bottom: 10px;\n}\n\n.success-message {\n    display: flex;\n    align-items: center;\n    margin-bottom: 10px;\n    font-size: larger;\n}\n\n.success-icon {\n    width: 30px;\n    height: 30px;\n    margin-right: 5px;\n}\n\n.success-text {\n    font-size: 16px;\n}\n\n.uploaded-thumbnail {\n    width: 400px;\n    height: auto;\n    margin-top: 10px;\n    margin-bottom: 10px;\n}\n\n.clear-button {\n    padding: 10px 20px;\n    background-color: #888888;\n    color: white;\n    border: none;\n    border-radius: 8px;\n    font-size: 16px;\n    font-weight: bold;\n    cursor: pointer;\n    transition: background-color 0.3s ease;\n    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);\n}\n\n.clear-button:hover {\n    background-color: #555555;\n}\n\n#start-detection-button {\n    background-color: #007BFF;\n    color: white;\n    border: none;\n    border-radius: 25px;\n    padding: 10px 20px;\n    font-size: 16px;\n    font-weight: bold;\n    cursor: pointer;\n    margin-top: 0px;\n    width: 410px;\n    transition: background-color 0.3s ease;\n}\n\n#start-detection-button:hover {\n    background-color: #0056b3;\n}\n\n/* ============================= 结果容器样式 =============================*/\n#result-container {\n    display: flex;\n    flex-direction: row;\n    justify-content: space-between;\n    align-items: flex-start;\n    width: 100%;\n    max-width: 1500px;\n    gap: 20px;\n}\n\n#original-image-container,\n#detection-result-container {\n    display: flex;\n    flex-direction: column;\n    align-items: center;\n    width: 50%;\n    height: 450px;\n    border: 1px solid #ddd;\n    border-radius: 10px;\n    padding-top: 10px;\n    padding-left: 20px;\n    padding-right: 20px;\n    padding-bottom: 20px;\n    background-color: #ffffff;\n    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);\n    transition: transform 0.3s ease;\n}\n\n#original-image-container:hover,\n#detection-result-container:hover {\n    transform: scale(1.02);\n    transition: transform 0.3s ease;\n}\n\n.result_title {\n    width: 100%;\n    height: 20px;\n    margin-top: 0px;\n    text-align: center;\n    padding: 10px;\n    border-radius: 8px;\n    font-family: Arial,\n        sans-serif;\n    font-weight: bold;\n    font-size: 18px;\n}\n\n#logo-extraction-result {\n    width: 100%;\n    height: 100%;\n    display: flex;\n    justify-content: center;\n    align-items: center;\n    overflow: hidden;\n    margin-top: 10px;\n    background-color: #f9f9f9;\n    border: 1px solid #ddd;\n    border-radius: 8px;\n}\n\n#original-image {\n    max-height: 100%;\n    max-width: 100%;\n    object-fit: contain;\n}\n\n#detection-result {\n    width: 100%;\n    height: 100%;\n    margin-top: 10px;\n    text-align: left;\n    padding: 10px;\n    background-color: #f9f9f9;\n    border: 1px solid #ddd;\n    border-radius: 8px;\n}\n\n#detection-label {\n    display: inline-block;\n    font-family: Arial, sans-serif;\n    font-size: 14px;\n    font-weight: bold;\n    color: white;\n    padding: 3px 6px;\n    border-radius: 16px;\n    text-align: center;\n    transition: transform 0.2s, box-shadow 0.2s;\n}\n\n#detection-label.benign {\n    background: linear-gradient(90deg, #4CAF50, #4CAF50);\n}\n\n#detection-label.phishing {\n    background: linear-gradient(90deg, #F44336, #F44336);\n}\n\n#detection-label.unknown {\n    background: linear-gradient(90deg, #9E9E9E, #9E9E9E);\n}\n\n#detection-explanation {\n    font-size: 14px;\n    color: #333;\n}\n\n.separator {\n    width: 100%;\n    height: 2px;\n    background-color: #ddd;\n    margin: 10px 0;\n}\n\n\n.tasks-list {\n    list-style: none;\n    padding: 0;\n    margin: 0;\n}\n\n.tasks-list li {\n    display: flex;\n    align-items: center;\n    justify-content: flex-start;\n    padding: 8px 0;\n    border-bottom: 1px solid #eee;\n}\n\n.tasks-list li:last-child {\n    border-bottom: none;\n}\n\n.icon {\n    margin-right: 8px;\n    font-size: 16px;\n}\n\n.task {\n    font-size: 14px;\n    color: #555;\n    margin-right: 12px;\n}\n\n.result {\n    font-size: 14px;\n    color: #5b5b5b;\n    background-color: #cdcdcd;\n    padding: 3px 6px;\n    border-radius: 10px;\n}\n\n#detection-explanation {\n    font-family: Arial, sans-serif;\n    font-size: 14px;\n    line-height: 1.8;\n    color: #333;\n    background-color: #f9f9f9;\n    padding: 16px;\n    border-left: 4px solid #0078d4;\n    border-radius: 8px;\n    box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);\n    margin: 16px 0;\n}\n\n#detection-explanation p {\n    margin: 0;\n}\n\n#detection-explanation strong {\n    color: #d9534f;\n    font-weight: bold;\n    background-color: #fff0f0;\n    padding: 2px 4px;\n    border-radius: 4px;\n}"
  },
  {
    "path": "WEBtool/static/js/main.js",
    "content": "new Vue({\n    el: '#main-container',\n    data() {\n        return {\n            url: '',\n            result: null,\n            uploadedImage: null,\n            imageUrl: '',\n            uploadSuccess: false,\n        }\n    },\n    methods: {\n        startDetection() {\n            if (!this.url) {\n                alert('Please enter a valid URL.');\n                return;\n            }\n\n            // 发送 POST 请求到 /detect 路由\n            fetch('/detect', {\n                method: 'POST',\n                headers: {\n                    'Content-Type': 'application/json'\n                },\n                body: JSON.stringify({\n                    url: this.url,\n                    imageUrl: this.imageUrl\n                })\n            })\n                .then(response => response.json())\n                .then(data => {\n                    this.result = data;  // Update all data\n\n                    if (data.logo_extraction) { // Logo Extraction Result\n                        document.getElementById('original-image').src = `data:image/png;base64,${data.logo_extraction}`;\n                    }\n\n                    // Detectoin Result\n                    const labelElement = document.getElementById('detection-label');\n                    const explanationElement = document.getElementById('detection-explanation');\n                    const matched_brand_element = document.getElementById('matched-brand');\n                    const siamese_conf_element = document.getElementById('siamese-conf');\n                    const correct_domain_element = document.getElementById('correct-domain');\n                    const detection_time_element = document.getElementById('detection-time');\n\n                    detection_time_element.textContent = data.detection_time + ' s';\n                    if (data.result === 'Benign') {\n                        labelElement.className = 'benign';\n                        labelElement.textContent = 'Benign';\n                        matched_brand_element.textContent = data.matched_brand;\n                        siamese_conf_element.textContent = data.confidence;\n                        correct_domain_element.textContent = data.correct_domain;\n                        explanationElement.innerHTML = `\n                            <p>This website has been analyzed and determined to be <strong>${labelElement.textContent.toLowerCase()}</strong>. \n                            Because we have matched a brand <strong>${data.matched_brand}</strong> with confidence <strong>${Math.round(data.confidence * 100, 3)}, </strong>\n                            and the domain extracted from url is within the domain list under the brand (which is <strong>[${data.correct_domain}]</strong>). \n                            Enjoy your surfing!</p>\n                        `;\n                    } else if (data.result === 'Phishing') {\n                        labelElement.className = 'phishing';\n                        labelElement.textContent = 'Phishing';\n                        matched_brand_element.textContent = data.matched_brand;\n                        siamese_conf_element.textContent = data.confidence;\n                        correct_domain_element.textContent = data.correct_domain;\n                        explanationElement.innerHTML = `\n                            <p>This website has been analyzed and determined to be <strong>${labelElement.textContent.toLowerCase()}</strong>. \n                            Because we have matched a brand <strong>${data.matched_brand}</strong> with confidence <strong>${Math.round(data.confidence * 100, 3)}%</strong>, \n                            but the domain extracted from url is NOT within the domain list under the brand (which is <strong>[${data.correct_domain}]</strong>). \n                            Please proceed with caution!</p>\n                        `;\n                    } else {\n                        labelElement.className = 'unknown';\n                        labelElement.textContent = 'Unknown';\n                        matched_brand_element.textContent = \"unknown\";\n                        siamese_conf_element.textContent = \"0.00\";\n                        correct_domain_element.textContent = \"unknown\";\n                        explanationElement.innerHTML = `\n                            <p>Sorry, we don't find any matched brand in database so this website is determined to be <strong>${labelElement.textContent.toLowerCase()}</strong>.</p>\n                            <p>It is still possible that this is a <strong>phishing</strong> site. Please proceed with caution!</p>\n                        `;\n                    }\n                })\n                .catch(error => {\n                    console.error('Error:', error);\n                    alert('检测失败，请稍后重试。');\n                });\n        },\n        handleImageUpload(event) {  // 处理图片上传事件\n            const file = event.target.files[0];\n            if (file) {\n                this.uploadedImage = file;\n                this.uploadImage();\n            }\n        },\n        uploadImage() {  // 上传图片到服务器\n            const formData = new FormData();\n            formData.append('image', this.uploadedImage);\n\n            fetch('/upload', {  // 假设上传图片的路由是 /upload\n                method: 'POST',\n                body: formData\n            })\n                .then(response => response.json())\n                .then(data => {\n                    if (data.success) {\n                        this.imageUrl = data.imageUrl;  // 更新图片URL\n                        this.uploadSuccess = true;  // 标记上传成功\n                    } else {\n                        alert('上传图片失败: ' + data.error);\n                    }\n                })\n                .catch(error => {\n                    console.error('Error:', error);\n                    alert('上传图片失败，请稍后重试。');\n                });\n        },\n        clearUpload() {  // 清除上传的图像\n            fetch('/clear_upload', {  // 假设删除图片的路由是 /delete-image\n                method: 'POST',\n                headers: {\n                    'Content-Type': 'application/json'\n                },\n                body: JSON.stringify({ imageUrl: this.imageUrl })\n            })\n                .then(response => response.json())\n                .then(data => {\n                    if (data.success) {\n                        this.imageUrl = '';\n                        this.uploadSuccess = false;  // 重置上传状态\n                    } else {\n                        alert('删除图片失败: ' + data.error);\n                    }\n                })\n                .catch(error => {\n                    console.error('Error:', error);\n                    alert('删除图片失败，请稍后重试。');\n                });\n        }\n    }\n});\n"
  },
  {
    "path": "WEBtool/static/js/sidebar.js",
    "content": "// sidebar.js\nnew Vue({\n    el: '#sidebar',\n    data() {\n        return {\n            selectedDirectory: null, // 记录当前选中的目录\n            selectedFile: null,      // 记录当前选中的文件\n            selectedDirectoryName: '',\n            selectedFileName: '',\n            showAddBrandForm: false, // 控制表单显示与隐藏\n            brandName: '',           // 品牌名称\n            brandDomain: '',         // 品牌域名\n        }\n    },\n    mounted() {\n        // 网页加载时调用 fetchFileTree 函数\n        this.fetchFileTree();\n        document.getElementById('logo-file-input').addEventListener('change', this.handleLogoFileSelect);\n\n        const sidebar = document.getElementById(\"sidebar\");\n        const sidebarToggle = document.getElementById(\"sidebar-toggle\");\n        const closeSidebar = document.getElementById(\"close-sidebar\");\n\n        // 点击打开侧边栏\n        sidebarToggle.addEventListener(\"click\", () => {\n            sidebar.classList.add(\"open\");\n        });\n\n        // 点击关闭侧边栏\n        closeSidebar.addEventListener(\"click\", () => {\n            sidebar.classList.remove(\"open\");\n            this.clearSelected();\n        });\n\n        // 点击侧边栏外部关闭\n        document.addEventListener(\"click\", (event) => {\n            if (!sidebar.contains(event.target) && !sidebarToggle.contains(event.target)) {\n                sidebar.classList.remove(\"open\");\n                this.clearSelected();\n            }\n        });\n    },\n    methods: {\n        // 递归渲染文件树\n        renderFileTree(directory, parentPath = '') {\n            // 获取文件树容器\n            const fileTreeRoot = document.getElementById('file-tree-root');\n            fileTreeRoot.innerHTML = ''; // 清空现有内容\n\n            // 递归生成文件树节点\n            const createFileTreeNode = (item, parentPath) => {\n                const li = document.createElement('li');\n                li.classList.add('file-item');\n\n                const currentPath = parentPath ? `${parentPath}/${item.name}` : item.name;\n\n                if (item.type === 'directory') {\n                    li.classList.add('file-folder');\n\n                    const folderNameContainer = document.createElement('div');\n                    folderNameContainer.classList.add('folder-name');\n                    folderNameContainer.innerHTML = `<i class=\"folder-icon\">📁</i><span>${item.name}</span>`;\n                    li.appendChild(folderNameContainer);\n\n                    if (item.children) {\n                        const ul = document.createElement('ul');\n                        ul.classList.add('hidden'); // 默认隐藏子目录\n                        item.children.forEach((child) => {\n                            ul.appendChild(createFileTreeNode(child, currentPath)); // 传递当前目录的路径\n                        });\n                        li.appendChild(ul);\n\n                        // 单击选中目录\n                        folderNameContainer.addEventListener('click', (e) => {\n                            e.stopPropagation();\n                            this.selectDirectory(e, item.name);\n                        });\n\n                        // 双击展开/隐藏目录\n                        folderNameContainer.addEventListener('dblclick', (e) => {\n                            e.stopPropagation();\n                            ul.classList.toggle('hidden');\n                        });\n                    }\n                } else {\n                    li.classList.add('file-file');\n                    li.innerHTML = `<i class=\"file-icon\">📄</i><span>${item.name}</span>`;\n\n                    // 单击选中文件\n                    li.addEventListener('click', (event) => {\n                        this.selectFile(event, item.name, parentPath);\n                    });\n                }\n\n                return li;\n            };\n\n            // 遍历顶层文件和目录\n            directory.forEach((item) => {\n                fileTreeRoot.appendChild(createFileTreeNode(item, parentPath));\n            });\n        },\n        // 获取文件树数据\n        fetchFileTree() {\n            // 发送请求获取文件树数据\n            fetch('/get-directory') // 后端文件树接口\n                .then((response) => response.json())\n                .then((data) => {\n                    if (data.file_tree) {\n                        this.fileTree = data.file_tree; // 存储文件树数据\n                        this.renderFileTree(this.fileTree); // 渲染文件树\n                    } else {\n                        console.error('Invalid file tree data');\n                        alert('文件树加载失败');\n                    }\n                })\n                .catch((error) => {\n                    console.error('Error fetching file tree:', error);\n                    alert('无法加载文件树，请稍后重试。');\n                });\n        },\n\n        // 选中目录\n        selectDirectory(event, directoryName) {\n            const folderNameContainer = event.currentTarget;\n\n            if (this.selectedDirectory) {\n                this.selectedDirectory.classList.remove('selected');\n            }\n            if (this.selectedFile) {\n                this.selectedFile.classList.remove('selected');\n            }\n\n            // 设置当前选中的目录\n            this.selectedDirectory = folderNameContainer;\n            this.selectedDirectoryName = directoryName;\n            folderNameContainer.classList.add('selected');\n            this.selectedFile = null;\n            this.selectedFileName = '';\n        },\n\n        // 选中文件\n        selectFile(event, fileName, parentPath) {\n            const fileElement = event.currentTarget;\n\n            if (this.selectedDirectory) {\n                this.selectedDirectory.classList.remove('selected');\n            }\n            if (this.selectedFile) {\n                this.selectedFile.classList.remove('selected');\n            }\n\n            // 设置当前选中的文件\n            this.selectedFile = fileElement;\n            this.selectedFileName = fileName;\n            fileElement.classList.add('selected');\n            this.selectedDirectory = null;\n            this.selectedDirectoryName = parentPath;\n        },\n\n        // 增加品牌\n        addBrand() {\n            this.showAddBrandForm = true;\n        },\n\n        // 关闭添加品牌的表单\n        closeAddBrandForm() {\n            this.showAddBrandForm = false;\n            this.brandName = '';\n            this.brandDomain = '';\n        },\n\n        // 提交添加品牌的表单\n        submitAddBrandForm() {\n            if (!this.brandName || !this.brandDomain) {\n                alert('Please fill in all fields.');\n                closeAddBrandForm()\n                return;\n            }\n\n            const formData = new FormData();\n            formData.append('brandName', this.brandName);\n            formData.append('brandDomain', this.brandDomain);\n\n            fetch('/add-brand', {\n                method: 'POST',\n                body: formData\n            })\n                .then(response => response.json())\n                .then(data => {\n                    if (data.success) {\n                        alert('Brand added successfully.');\n                        this.fetchFileTree();\n                        this.closeAddBrandForm();\n                    } else {\n                        alert('Failed to add brand: ' + data.error);\n                    }\n                })\n                .catch(error => {\n                    console.error('Error:', error);\n                    alert('Failed to add brand, please try again.');\n                });\n        },\n\n        // 删除品牌\n        delBrand() {\n            if (this.selectedDirectory == null) {\n                alert('Please select a brand first.');\n                return;\n            }\n            const formData = new FormData();\n            formData.append('directory', this.selectedDirectoryName);\n\n            fetch('/del-brand', {\n                method: 'POST',\n                headers: {\n                    'Content-Type': 'application/json'\n                },\n                body: JSON.stringify({\n                    directory: this.selectedDirectoryName\n                })\n            })\n                .then(response => response.json())\n                .then(data => {\n                    if (data.success) {\n                        alert('Brand deletedsuccessfully.');\n                        this.fetchFileTree();\n                    }\n                })\n        },\n\n        // 增加logo\n        addLogo() {\n            console.log('addLogo');\n            if (this.selectedDirectory == null) {\n                alert('Please select a brand first.');\n                return;\n            }\n            document.getElementById('logo-file-input').click();\n        },\n\n        handleLogoFileSelect(event) {\n            const file = event.target.files[0];\n            if (file) {\n                const formData = new FormData();\n                formData.append('logo', file);\n                formData.append('directory', this.selectedDirectoryName);\n\n                fetch('/add-logo', {\n                    method: 'POST',\n                    body: formData\n                })\n                    .then(response => response.json())\n                    .then(data => {\n                        if (data.success) {\n                            this.fetchFileTree();\n                        } else {\n                            alert('Failed to add logo: ' + data.error);\n                        }\n                    })\n                    .catch(error => {\n                        console.error('Error:', error);\n                        alert('Failed to add logo, please try again.');\n                    });\n            }\n        },\n\n        // 删除logo\n        delLogo() {\n            if (this.selectedFile == null) {\n                alert('Please select a logo first.');\n                return;\n            }\n\n            const formData = new FormData();\n            formData.append('directory', this.selectedDirectoryName);\n            formData.append('filename', this.selectedFileName);\n\n            fetch('/del-logo', {\n                method: 'POST',\n                body: formData\n            })\n                .then(response => response.json())\n                .then(data => {\n                    if (data.success) {\n                        this.fetchFileTree();\n                    } else {\n                        alert('Failed to delete logo: ' + data.error);\n                    }\n                })\n                .catch(error => {\n                    console.error('Error:', error);\n                    alert('Failed to delete logo, please try again.');\n                });\n        },\n\n        async reloadModel() {\n            const overlay = document.getElementById('overlay');\n\n            overlay.style.display = 'flex';\n\n            try {\n                const response = await fetch('/reload-model', {\n                    method: 'POST',\n                    headers: {\n                        'Content-Type': 'application/json'\n                    }\n                });\n                const data = await response.json();\n            } catch (error) {\n                alert('Failed to reload model.');\n            } finally {\n                overlay.style.display = 'none';\n            }\n        },\n\n        clearSelected() {\n            if (this.selectedDirectory) {\n                this.selectedDirectory.classList.remove('selected');\n                this.selectDirectory = null;\n            }\n            if (this.selectedFile) {\n                this.selectedFile.classList.remove('selected');\n                this.selectFile = null;\n            }\n            this.selectedDirectoryName = '';\n            this.selectedFileName = '';\n        },\n    }\n});"
  },
  {
    "path": "WEBtool/templates/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n    <meta charset=\"UTF-8\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    <title>PhishPedia</title>\n    <link rel=\"stylesheet\" href=\"{{ url_for('static', filename='css/style.css') }}\">\n    <link rel=\"stylesheet\" href=\"{{ url_for('static', filename='css/sidebar.css') }}\">\n</head>\n\n<body>\n    <!-- Logo 和图标部分 -->\n    <div id=\"header\">\n        <img src=\"{{ url_for('static', filename='icon/fish.png') }}\" alt=\"Logo\" id=\"logo-icon\">\n        <span id=\"logo-text\">PhishPedia</span>\n        <button id=\"sidebar-toggle\" class=\"sidebar-toggle\">☰</button>\n    </div>\n\n    <div id=\"overlay\" style=\"display: none;\">\n        <div class=\"spinner-container\">\n            <div id=\"spinner\"></div>\n            <p>Reloading model, this may take some time...</p>\n        </div>\n    </div>\n\n    <!-- 侧边栏 -->\n    <div id=\"sidebar\" class=\"sidebar\">\n        <div class=\"sidebar-header\">\n            <span>DATABASE</span>\n            <button id=\"close-sidebar\" class=\"close-sidebar\">✖</button>\n        </div>\n        <div class=\"separator\"></div>\n        <!-- 按钮组 -->\n        <div class=\"sidebar-buttons\">\n            <button class=\"sidebar-button\" @click=\"addBrand\">ADD Brand</button>\n            <button class=\"sidebar-button\" @click=\"delBrand\">DEL Brand</button>\n            <button class=\"sidebar-button\" @click=\"addLogo\">ADD LOGO</button>\n            <button class=\"sidebar-button\" @click=\"delLogo\">DEL LOGO</button>\n            <button class=\"sidebar-button\" @click=\"reloadModel\">Reload Model</button>\n        </div>\n        <input type=\"file\" id=\"logo-file-input\" style=\"display: none;\" accept=\".png,.jpeg,.jpg\">\n        <div class=\"separator\"></div>\n\n        <!-- 文件树容器 -->\n        <div class=\"file-tree\">\n            <ul id=\"file-tree-root\" class=\"file-tree-root\">\n                <!-- 文件树的内容将由 JavaScript 动态生成 -->\n            </ul>\n        </div>\n\n        <!-- 添加品牌表单 -->\n        <div v-if=\"showAddBrandForm\" id=\"add-brand-form\" class=\"form-container\">\n            <form @submit.prevent=\"submitAddBrandForm\">\n                <h3>Add A New Brand</h3>\n                <div class=\"separator\"></div>\n\n                <label for=\"brandName\">Brand Name</label>\n                <input type=\"text\" id=\"brandName\" v-model=\"brandName\" required>\n\n                <label for=\"brandDomain\">Domain List</label>\n                <input type=\"text\" id=\"brandDomain\" v-model=\"brandDomain\" required>\n\n                <div class=\"form-actions\">\n                    <button type=\"submit\">ADD</button>\n                    <button type=\"button\" @click=\"closeAddBrandForm\">CANCLE</button>\n                </div>\n            </form>\n        </div>\n    </div>\n\n    <!-- 页面居中内容 -->\n    <div id=\"main-container\">\n        <div id=\"input-container\">\n            <div class=\"inner-container\">\n                <!-- URL 输入框 -->\n                <div id=\"url-input-container\">\n                    <label for=\"url-input\" class=\"custom-label\">URL</label>\n                    <input type=\"text\" id=\"url-input\" v-model=\"url\" placeholder=\"Enter URL:\" />\n                </div>\n                <!-- 图片接收区域 -->\n                <div id=\"image-upload-container\">\n                    <div id=\"image-drop-area\" class=\"drop-area\" v-if=\"!uploadSuccess\">\n                        <img src=\"{{ url_for('static', filename='icon/file1.png') }}\" alt=\"Upload Icon\"\n                            class=\"upload-icon\" />\n                        <p></p>\n                        <label for=\"image-upload\" class=\"upload-label\">+ Upload Image</label>\n                        <p style=\"font-size: 14px;\">Or ctrl+v here</p>\n                        <input type=\"file\" id=\"image-upload\" accept=\"image/*\" style=\"display: none;\"\n                            @change=\"handleImageUpload\" />\n                    </div>\n                    <div id=\"upload-success-area\" class=\"upload-success-area\" v-if=\"uploadSuccess\">\n                        <div class=\"success-message\">\n                            <img src=\"{{ url_for('static', filename='icon/succ.png') }}\" alt=\"Success Icon\"\n                                class=\"success-icon\" />\n                            <span class=\"success-text\">Uploaded Successfully!</span>\n                        </div>\n                        <img :src=\"imageUrl\" alt=\"Uploaded Image\" class=\"uploaded-thumbnail\" />\n                        <button class=\"clear-button\" @click=\"clearUpload\">clear</button>\n                    </div>\n                </div>\n                <!-- 新增的开始检测按钮 -->\n                <button id=\"start-detection-button\" @click=\"startDetection\">Start Detection !</button>\n            </div>\n        </div>\n        <div id=\"output-container\">\n            <div id=\"result-container\">\n                <div id=\"original-image-container\">\n                    <span class=\"result_title\">Logo Extraction</span>\n                    <div id=\"logo-extraction-result\">\n                        <img id=\"original-image\" src=\"{{ url_for('static', filename='icon/noresult1.png') }}\"\n                            alt=\"Original Webpage Screenshot\" />\n                    </div>\n                </div>\n                <div id=\"detection-result-container\">\n                    <span class=\"result_title\">Detection Result</span>\n                    <div id=\"detection-result\">\n                        <div>\n                            <span class=\"icon\">📊</span>\n                            <span class=\"task\" style=\"font-weight: bold;\">Result</span>\n                            <div id=\"detection-label\"></div>\n                        </div>\n                        <div class=\"separator\"></div>\n                        <div>\n                            <ul class=\"tasks-list\">\n                                <li>\n                                    <span class=\"icon\">🏷️</span>\n                                    <span class=\"task\">Matched Brand</span>\n                                    <span class=\"result\" id=\"matched-brand\"></span>\n                                </li>\n                                <li>\n                                    <span class=\"icon\">💬</span>\n                                    <span class=\"task\">Siamese Confidence</span>\n                                    <span class=\"result\" id=\"siamese-conf\"></span>\n                                </li>\n                                <li>\n                                    <span class=\"icon\">🌐</span>\n                                    <span class=\"task\">Correct Domain</span>\n                                    <span class=\"result\" id=\"correct-domain\"></span>\n                                </li>\n                                <li>\n                                    <span class=\"icon\">⏱️</span>\n                                    <span class=\"task\">Detection Time</span>\n                                    <span class=\"result\" id=\"detection-time\"></span>\n                                </li>\n                                <li>\n                                    <div id=\"detection-explanation\"></div>\n                                </li>\n                            </ul>\n                        </div>\n                    </div>\n                </div>\n            </div>\n        </div>\n    </div>\n\n    <!-- Vue.js 和自定义脚本 -->\n    <script src=\"https://cdn.jsdelivr.net/npm/vue@2\"></script>\n    <script src=\"{{ url_for('static', filename='js/main.js') }}\"></script>\n    <script src=\"{{ url_for('static', filename='js/sidebar.js') }}\"></script>\n</body>\n\n</html>"
  },
  {
    "path": "WEBtool/utils_web.py",
    "content": "# help function for phishpedia web app\nimport os\nimport pickle\nimport shutil\nimport socket\nimport base64\nimport io\nfrom PIL import Image\nimport cv2\n\n\ndef check_port_inuse(port, host):\n    try:\n        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n        s.settimeout(1)\n        s.connect((host, port))\n        return True\n    except socket.error:\n        return False\n    finally:\n        if s:\n            s.close()\n\n\ndef allowed_file(filename):\n    return '.' in filename and \\\n           filename.rsplit('.', 1)[1].lower() in {'png', 'jpg', 'jpeg'}\n\n\ndef initial_upload_folder(upload_folder):\n    try:\n        shutil.rmtree(upload_folder)\n    except FileNotFoundError:\n        pass\n    os.makedirs(upload_folder, exist_ok=True)\n    \n    \ndef convert_to_base64(image_array):\n    if image_array is None:\n        return None\n    \n    image_array_rgb = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)\n    img = Image.fromarray(image_array_rgb)\n    buffered = io.BytesIO()\n    img.save(buffered, format=\"PNG\")\n    plotvis_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')\n    return plotvis_base64\n\n\ndef domain_map_add(brand_name, domains_str, domain_map_path):\n    domains = [domain.strip() for domain in domains_str.split(',') if domain.strip()]\n    \n    # Load existing domain mapping\n    with open(domain_map_path, 'rb') as f:\n        domain_map = pickle.load(f)\n    \n    # Add new brand and domains\n    if brand_name in domain_map:\n        if isinstance(domain_map[brand_name], list):\n            # Add new domains, avoid duplicates\n            existing_domains = set(domain_map[brand_name])\n            for domain in domains:\n                if domain not in existing_domains:\n                    domain_map[brand_name].append(domain)\n        else:\n            # If current value is not a list, convert to list\n            old_domain = domain_map[brand_name]\n            domain_map[brand_name] = [old_domain] + [d for d in domains if d != old_domain]\n    else:\n        domain_map[brand_name] = domains\n    \n    # Save updated mapping\n    with open(domain_map_path, 'wb') as f:\n        pickle.dump(domain_map, f)\n\n        \ndef domain_map_delete(brand_name, domain_map_path):\n    # Load existing domain mapping\n    with open(domain_map_path, 'rb') as f:\n        domain_map = pickle.load(f)\n    \n    print(\"before deleting\", len(domain_map))\n    \n    # Delete brand and its domains\n    if brand_name in domain_map:\n        del domain_map[brand_name]\n    \n    print(\"after deleting\", len(domain_map))\n    \n    # Save updated mapping\n    with open(domain_map_path, 'wb') as f:\n        pickle.dump(domain_map, f)\n"
  },
  {
    "path": "configs.py",
    "content": "# Global configuration\nimport yaml\nfrom logo_matching import cache_reference_list, load_model_weights\nfrom logo_recog import config_rcnn\nimport os\nimport numpy as np\n\n\ndef get_absolute_path(relative_path):\n    base_path = os.path.dirname(__file__)\n    return os.path.abspath(os.path.join(base_path, relative_path))\n\n\ndef load_config(reload_targetlist=False):\n    with open(os.path.join(os.path.dirname(__file__), 'configs.yaml')) as file:\n        configs = yaml.load(file, Loader=yaml.FullLoader)\n\n    # Iterate through the configuration and update paths\n    for section, settings in configs.items():\n        for key, value in settings.items():\n            if 'PATH' in key and isinstance(value, str):  # Check if the key indicates a path\n                absolute_path = get_absolute_path(value)\n                configs[section][key] = absolute_path\n\n    ELE_CFG_PATH = configs['ELE_MODEL']['CFG_PATH']\n    ELE_WEIGHTS_PATH = configs['ELE_MODEL']['WEIGHTS_PATH']\n    ELE_CONFIG_THRE = configs['ELE_MODEL']['DETECT_THRE']\n    ELE_MODEL = config_rcnn(ELE_CFG_PATH,\n                            ELE_WEIGHTS_PATH,\n                            conf_threshold=ELE_CONFIG_THRE)\n\n    # siamese model\n    SIAMESE_THRE = configs['SIAMESE_MODEL']['MATCH_THRE']\n\n    print('Load protected logo list')\n    targetlist_zip_path = configs['SIAMESE_MODEL']['TARGETLIST_PATH']\n    targetlist_dir = os.path.dirname(targetlist_zip_path)\n    zip_file_name = os.path.basename(targetlist_zip_path)\n    targetlist_folder = zip_file_name.split('.zip')[0]\n    full_targetlist_folder_dir = os.path.join(targetlist_dir, targetlist_folder)\n\n    # if reload_targetlist or targetlist_zip_path.endswith('.zip') and not os.path.isdir(full_targetlist_folder_dir):\n    #     os.makedirs(full_targetlist_folder_dir, exist_ok=True)\n    #     subprocess.run(f'unzip -o \"{targetlist_zip_path}\" -d \"{full_targetlist_folder_dir}\"', shell=True)\n\n    SIAMESE_MODEL = load_model_weights(num_classes=configs['SIAMESE_MODEL']['NUM_CLASSES'],\n                                       weights_path=configs['SIAMESE_MODEL']['WEIGHTS_PATH'])\n\n    LOGO_FEATS_NAME = 'LOGO_FEATS.npy'\n    LOGO_FILES_NAME = 'LOGO_FILES.npy'\n\n    if reload_targetlist or (not os.path.exists(os.path.join(os.path.dirname(__file__), LOGO_FEATS_NAME))):\n        LOGO_FEATS, LOGO_FILES = cache_reference_list(model=SIAMESE_MODEL,\n                                                      targetlist_path=full_targetlist_folder_dir)\n        print('Finish loading protected logo list')\n        np.save(os.path.join(os.path.dirname(__file__), LOGO_FEATS_NAME), LOGO_FEATS)\n        np.save(os.path.join(os.path.dirname(__file__), LOGO_FILES_NAME), LOGO_FILES)\n\n    else:\n        LOGO_FEATS, LOGO_FILES = np.load(os.path.join(os.path.dirname(__file__), LOGO_FEATS_NAME)), \\\n            np.load(os.path.join(os.path.dirname(__file__), LOGO_FILES_NAME))\n\n    DOMAIN_MAP_PATH = configs['SIAMESE_MODEL']['DOMAIN_MAP_PATH']\n\n    return ELE_MODEL, SIAMESE_THRE, SIAMESE_MODEL, LOGO_FEATS, LOGO_FILES, DOMAIN_MAP_PATH\n"
  },
  {
    "path": "configs.yaml",
    "content": "ELE_MODEL: # element recognition model -- logo only\n  CFG_PATH: models/faster_rcnn.yaml # os.path.join(os.path.dirname(__file__), xxx)\n  WEIGHTS_PATH: models/rcnn_bet365.pth\n  DETECT_THRE: 0.05\n\nSIAMESE_MODEL:\n  NUM_CLASSES: 277 # number of brands, users don't need to modify this even the targetlist is expanded\n  MATCH_THRE: 0.87 # FIXME: threshold is 0.87 in phish-discovery?\n  WEIGHTS_PATH: models/resnetv2_rgb_new.pth.tar\n  TARGETLIST_PATH: models/expand_targetlist.zip\n  DOMAIN_MAP_PATH: models/domain_map.pkl"
  },
  {
    "path": "datasets/test_sites/accounts.g.cdcde.com/html.txt",
    "content": ""
  },
  {
    "path": "datasets/test_sites/accounts.g.cdcde.com/info.txt",
    "content": ""
  },
  {
    "path": "logo_matching.py",
    "content": "from PIL import Image, ImageOps\nfrom torchvision import transforms\nfrom utils import brand_converter, resolution_alignment, l2_norm\nfrom models import KNOWN_MODELS\nimport torch\nimport os\nimport numpy as np\nfrom collections import OrderedDict\nfrom tqdm import tqdm\nfrom tldextract import tldextract\nimport pickle\n\nCOUNTRY_TLDs = [\n    \".af\",\n    \".ax\",\n    \".al\",\n    \".dz\",\n    \".as\",\n    \".ad\",\n    \".ao\",\n    \".ai\",\n    \".aq\",\n    \".ag\",\n    \".ar\",\n    \".am\",\n    \".aw\",\n    \".ac\",\n    \".au\",\n    \".at\",\n    \".az\",\n    \".bs\",\n    \".bh\",\n    \".bd\",\n    \".bb\",\n    \".eus\",\n    \".by\",\n    \".be\",\n    \".bz\",\n    \".bj\",\n    \".bm\",\n    \".bt\",\n    \".bo\",\n    \".bq\",\".an\",\".nl\",\n    \".ba\",\n    \".bw\",\n    \".bv\",\n    \".br\",\n    \".io\",\n    \".vg\",\n    \".bn\",\n    \".bg\",\n    \".bf\",\n    \".mm\",\n    \".bi\",\n    \".kh\",\n    \".cm\",\n    \".ca\",\n    \".cv\",\n    \".cat\",\n    \".ky\",\n    \".cf\",\n    \".td\",\n    \".cl\",\n    \".cn\",\n    \".cx\",\n    \".cc\",\n    \".co\",\n    \".km\",\n    \".cd\",\n    \".cg\",\n    \".ck\",\n    \".cr\",\n    \".ci\",\n    \".hr\",\n    \".cu\",\n    \".cw\",\n    \".cy\",\n    \".cz\",\n    \".dk\",\n    \".dj\",\n    \".dm\",\n    \".do\",\n    \".tl\",\".tp\",\n    \".ec\",\n    \".eg\",\n    \".sv\",\n    \".gq\",\n    \".er\",\n    \".ee\",\n    \".et\",\n    \".eu\",\n    \".fk\",\n    \".fo\",\n    \".fm\",\n    \".fj\",\n    \".fi\",\n    \".fr\",\n    \".gf\",\n    \".pf\",\n    \".tf\",\n    \".ga\",\n    \".gal\",\n    \".gm\",\n    \".ps\",\n    \".ge\",\n    \".de\",\n    \".gh\",\n    \".gi\",\n    \".gr\",\n    \".gl\",\n    \".gd\",\n    \".gp\",\n    \".gu\",\n    \".gt\",\n    \".gg\",\n    \".gn\",\n    \".gw\",\n    \".gy\",\n    \".ht\",\n    \".hm\",\n    \".hn\",\n    \".hk\",\n    \".hu\",\n    \".is\",\n    \".in\",\n    \".id\",\n    \".ir\",\n    \".iq\",\n    \".ie\",\n    \".im\",\n    \".il\",\n    \".it\",\n    \".jm\",\n    \".jp\",\n    \".je\",\n    \".jo\",\n    \".kz\",\n    \".ke\",\n    \".ki\",\n    \".kw\",\n    \".kg\",\n    \".la\",\n    \".lv\",\n    \".lb\",\n    \".ls\",\n    \".lr\",\n    \".ly\",\n    \".li\",\n    \".lt\",\n    \".lu\",\n    \".mo\",\n    \".mk\",\n    \".mg\",\n    \".mw\",\n    \".my\",\n    \".mv\",\n    \".ml\",\n    \".mt\",\n    \".mh\",\n    \".mq\",\n    \".mr\",\n    \".mu\",\n    \".yt\",\n    \".mx\",\n    \".md\",\n    \".mc\",\n    \".mn\",\n    \".me\",\n    \".ms\",\n    \".ma\",\n    \".mz\",\n    \".mm\",\n    \".na\",\n    \".nr\",\n    \".np\",\n    \".nl\",\n    \".nc\",\n    \".nz\",\n    \".ni\",\n    \".ne\",\n    \".ng\",\n    \".nu\",\n    \".nf\",\n    \".nc\",\".tr\",\n    \".kp\",\n    \".mp\",\n    \".no\",\n    \".om\",\n    \".pk\",\n    \".pw\",\n    \".ps\",\n    \".pa\",\n    \".pg\",\n    \".py\",\n    \".pe\",\n    \".ph\",\n    \".pn\",\n    \".pl\",\n    \".pt\",\n    \".pr\",\n    \".qa\",\n    \".ro\",\n    \".ru\",\n    \".rw\",\n    \".re\",\n    \".bq\",\".an\",\n    \".bl\",\".gp\",\".fr\",\n    \".sh\",\n    \".kn\",\n    \".lc\",\n    \".mf\",\".gp\",\".fr\",\n    \".pm\",\n    \".vc\",\n    \".ws\",\n    \".sm\",\n    \".st\",\n    \".sa\",\n    \".sn\",\n    \".rs\",\n    \".sc\",\n    \".sl\",\n    \".sg\",\n    \".bq\",\".an\",\".nl\",\n    \".sx\",\".an\",\n    \".sk\",\n    \".si\",\n    \".sb\",\n    \".so\",\n    \".so\",\n    \".za\",\n    \".gs\",\n    \".kr\",\n    \".ss\",\n    \".es\",\n    \".lk\",\n    \".sd\",\n    \".sr\",\n    \".sj\",\n    \".sz\",\n    \".se\",\n    \".ch\",\n    \".sy\",\n    \".tw\",\n    \".tj\",\n    \".tz\",\n    \".th\",\n    \".tg\",\n    \".tk\",\n    \".to\",\n    \".tt\",\n    \".tn\",\n    \".tr\",\n    \".tm\",\n    \".tc\",\n    \".tv\",\n    \".ug\",\n    \".ua\",\n    \".ae\",\n    \".uk\",\n    \".us\",\n    \".vi\",\n    \".uy\",\n    \".uz\",\n    \".vu\",\n    \".va\",\n    \".ve\",\n    \".vn\",\n    \".wf\",\n    \".eh\",\n    \".ma\",\n    \".ye\",\n    \".zm\",\n    \".zw\"\n]\n\ndef check_domain_brand_inconsistency(logo_boxes,\n                                     domain_map_path: str,\n                                     model, logo_feat_list,\n                                     file_name_list, shot_path: str,\n                                     url: str, similarity_threshold: float,\n                                     topk: float = 3):\n    # targetlist domain list\n    with open(domain_map_path, 'rb') as handle:\n        domain_map = pickle.load(handle)\n\n    print('Number of logo boxes:', len(logo_boxes))\n    suffix_part = '.'+ tldextract.extract(url).suffix\n    domain_part = tldextract.extract(url).domain\n    extracted_domain = domain_part + suffix_part\n    matched_target, matched_domain, matched_coord, this_conf = None, None, None, None\n\n    if len(logo_boxes) > 0:\n        # siamese prediction for logo box\n        for i, coord in enumerate(logo_boxes):\n\n            if i == topk:\n                break\n\n            min_x, min_y, max_x, max_y = coord\n            bbox = [float(min_x), float(min_y), float(max_x), float(max_y)]\n            matched_target, matched_domain, this_conf = pred_brand(model, domain_map,\n                                                                   logo_feat_list, file_name_list,\n                                                                   shot_path, bbox,\n                                                                   similarity_threshold=similarity_threshold,\n                                                                   grayscale=False,\n                                                                   do_aspect_ratio_check=False,\n                                                                   do_resolution_alignment=False)\n\n            # print(target_this, domain_this, this_conf)\n            # domain matcher to avoid FP\n            if matched_target and matched_domain:\n                matched_coord = coord\n                matched_domain_parts = [tldextract.extract(x).domain for x in matched_domain]\n                matched_suffix_parts = [tldextract.extract(x).suffix for x in matched_domain]\n                \n                # If the webpage domain exactly aligns with the target website's domain => Benign\n                if extracted_domain in matched_domain:\n                    matched_target, matched_domain = None, None  # Clear if domains are consistent\n                elif domain_part in matched_domain_parts: # # elIf only the 2nd-level-domains align, and the tld is regional  => Benign\n                    if \".\" + suffix_part.split('.')[-1] in COUNTRY_TLDs:\n                        matched_target, matched_domain = None, None\n                    else:\n                        break # Inconsistent domain found, break the loop\n                else:\n                    break  # Inconsistent domain found, break the loop\n\n    return brand_converter(matched_target), matched_domain, matched_coord, this_conf\n\n\ndef load_model_weights(num_classes: int, weights_path: str):\n    '''\n    :param num_classes: number of protected brands\n    :param weights_path: siamese weights\n    :return model: siamese model\n    '''\n    # Initialize model\n    device = 'cuda' if torch.cuda.is_available() else 'cpu'\n    model = KNOWN_MODELS[\"BiT-M-R50x1\"](head_size=num_classes, zero_head=True)\n\n    # Load weights\n    weights = torch.load(weights_path, map_location='cpu')\n    weights = weights['model'] if 'model' in weights.keys() else weights\n    new_state_dict = OrderedDict()\n    for k, v in weights.items():\n        if 'module.' in k:\n            name = k.split('module.')[1]\n        else:\n            name = k\n        new_state_dict[name] = v\n\n    model.load_state_dict(new_state_dict)\n    model.to(device)\n    model.eval()\n    return model\n\n\ndef cache_reference_list(model, targetlist_path: str, grayscale=False):\n    '''\n    cache the embeddings of the reference list\n    :param targetlist_path: targetlist folder\n    :param grayscale: convert logo to grayscale or not, default is RGB\n    :return logo_feat_list: targetlist embeddings\n    :return file_name_list: targetlist paths\n    '''\n\n    # Prediction for targetlists\n    logo_feat_list = []\n    file_name_list = []\n\n    target_list = os.listdir(targetlist_path)\n    for target in tqdm(target_list):\n        if target.startswith('.'):  # skip hidden files\n            continue\n        logo_list = os.listdir(os.path.join(targetlist_path, target))\n        for logo_path in logo_list:\n            # List of valid image extensions\n            valid_extensions = ['.png', 'PNG', '.jpeg', '.jpg', '.JPG', '.JPEG']\n            if any(logo_path.endswith(ext) for ext in valid_extensions):\n                skip_prefixes = ['loginpage', 'homepage']\n                if any(logo_path.startswith(prefix) for prefix in skip_prefixes):  # skip homepage/loginpage\n                    continue\n                try:\n                    logo_feat_list.append(get_embedding(img=os.path.join(targetlist_path, target, logo_path),\n                                                        model=model, grayscale=grayscale))\n                    file_name_list.append(str(os.path.join(targetlist_path, target, logo_path)))\n                except OSError:\n                    print(f\"Error opening image: {os.path.join(targetlist_path, target, logo_path)}\")\n                    continue\n\n    return logo_feat_list, file_name_list\n\n\n@torch.no_grad()\ndef get_embedding(img, model, grayscale=False):\n    '''\n    Inference for a single image\n    :param img: image path in str or image in PIL.Image\n    :param model: model to make inference\n    :param grayscale: convert image to grayscale or not\n    :return feature embedding of shape (2048,)\n    '''\n    #     img_size = 224\n    img_size = 128\n    mean = [0.5, 0.5, 0.5]\n    std = [0.5, 0.5, 0.5]\n    device = 'cuda' if torch.cuda.is_available() else 'cpu'\n\n    img_transforms = transforms.Compose(\n        [transforms.ToTensor(),\n         transforms.Normalize(mean=mean, std=std),\n         ])\n\n    img = Image.open(img) if isinstance(img, str) else img\n    img = img.convert(\"L\").convert(\"RGB\") if grayscale else img.convert(\"RGB\")\n\n    ## Resize the image while keeping the original aspect ratio\n    pad_color = 255 if grayscale else (255, 255, 255)\n    img = ImageOps.expand(\n        img,\n        (\n            (max(img.size) - img.size[0]) // 2,\n            (max(img.size) - img.size[1]) // 2,\n            (max(img.size) - img.size[0]) // 2,\n            (max(img.size) - img.size[1]) // 2\n        ),\n        fill=pad_color\n    )\n\n    img = img.resize((img_size, img_size))\n\n    # Predict the embedding\n    img = img_transforms(img)\n    img = img[None, ...].to(device)\n    logo_feat = model.features(img)\n    logo_feat = l2_norm(logo_feat).squeeze(0).cpu().numpy()  # L2-normalization final shape is (2048,)\n\n    return logo_feat\n\ndef chunked_dot(logo_feat_list, img_feat, chunk_size=128):\n    sim_list = []\n\n    for start in range(0, logo_feat_list.shape[0], chunk_size):\n        end = start + chunk_size\n        chunk = logo_feat_list[start:end]\n        sim_chunk = np.dot(chunk, img_feat.T)  # shape: (chunk_size, M)\n        sim_list.extend(sim_chunk)\n\n    return sim_list\n\ndef pred_brand(model, domain_map, logo_feat_list, file_name_list, shot_path: str, gt_bbox, similarity_threshold,\n               grayscale=False,\n               do_resolution_alignment=True,\n               do_aspect_ratio_check=True):\n    '''\n    Return predicted brand for one cropped image\n    :param model: model to use\n    :param domain_map: brand-domain dictionary\n    :param logo_feat_list: reference logo feature embeddings\n    :param file_name_list: reference logo paths\n    :param shot_path: path to the screenshot\n    :param gt_bbox: 1x4 np.ndarray/list/tensor bounding box coords\n    :param similarity_threshold: similarity threshold for siamese\n    :param do_resolution_alignment: if the similarity does not exceed the threshold, do we align their resolutions to have a retry\n    :param do_aspect_ratio_check: once two logos are similar, whether we want to a further check on their aspect ratios\n    :param grayscale: convert image(cropped) to grayscale or not\n    :return: predicted target, predicted target's domain\n    '''\n\n    try:\n        img = Image.open(shot_path)\n    except OSError:  # if the image cannot be identified, return nothing\n        print('Screenshot cannot be open')\n        return None, None, None\n\n    # get predicted box --> crop from screenshot\n    cropped = img.crop((gt_bbox[0], gt_bbox[1], gt_bbox[2], gt_bbox[3]))\n    img_feat = get_embedding(cropped, model, grayscale=grayscale)\n\n    # get cosine similarity with every protected logo\n    sim_list = chunked_dot(logo_feat_list, img_feat) # take dot product for every pair of embeddings (Cosine Similarity)\n    pred_brand_list = file_name_list\n\n    assert len(sim_list) == len(pred_brand_list)\n\n    # get top 3 brands\n    idx = np.argsort(sim_list)[::-1][:3]\n    pred_brand_list = np.array(pred_brand_list)[idx]\n    sim_list = np.array(sim_list)[idx]\n\n    # top1,2,3 candidate logos\n    top3_brandlist = [brand_converter(os.path.basename(os.path.dirname(x))) for x in pred_brand_list]\n    top3_domainlist = [domain_map[x] for x in top3_brandlist]\n    top3_simlist = sim_list\n\n    for j in range(3):\n        predicted_brand, predicted_domain = None, None\n\n        # If we are trying those lower rank logo, the predicted brand of them should be the same as top1 logo, otherwise might be false positive\n        if top3_brandlist[j] != top3_brandlist[0]:\n            continue\n\n        # If the largest similarity exceeds threshold\n        if top3_simlist[j] >= similarity_threshold:\n            predicted_brand = top3_brandlist[j]\n            predicted_domain = top3_domainlist[j]\n            final_sim = top3_simlist[j]\n\n        # Else if not exceed, try resolution alignment, see if can improve\n        elif do_resolution_alignment:\n            orig_candidate_logo = Image.open(pred_brand_list[j])\n            cropped, candidate_logo = resolution_alignment(cropped, orig_candidate_logo)\n            img_feat = get_embedding(cropped, model, grayscale=grayscale)\n            logo_feat = get_embedding(candidate_logo, model, grayscale=grayscale)\n            final_sim = logo_feat.dot(img_feat)\n            if final_sim >= similarity_threshold:\n                predicted_brand = top3_brandlist[j]\n                predicted_domain = top3_domainlist[j]\n            else:\n                break  # no hope, do not try other lower rank logos\n\n        ## If there is a prediction, do aspect ratio check\n        if predicted_brand is not None:\n            if do_aspect_ratio_check:\n                orig_candidate_logo = Image.open(pred_brand_list[j])\n                ratio_crop = cropped.size[0] / cropped.size[1]\n                ratio_logo = orig_candidate_logo.size[0] / orig_candidate_logo.size[1]\n                # aspect ratios of matched pair must not deviate by more than factor of 2.5\n                if max(ratio_crop, ratio_logo) / min(ratio_crop, ratio_logo) > 2.5:\n                    continue  # did not pass aspect ratio check, try other\n            return predicted_brand, predicted_domain, final_sim\n\n    return None, None, top3_simlist[0]\n"
  },
  {
    "path": "logo_recog.py",
    "content": "from detectron2.config import get_cfg\nfrom detectron2.engine import DefaultPredictor\nimport cv2\nimport numpy as np\nimport torch\n\n\ndef pred_rcnn(im, predictor):\n    '''\n    Perform inference for RCNN\n    :param im:\n    :param predictor:\n    :return:\n    '''\n    im = cv2.imread(im)\n\n    if im is not None:\n        if im.shape[-1] == 4:\n            im = cv2.cvtColor(im, cv2.COLOR_BGRA2BGR)\n    else:\n        print(f\"Image at path {im} is None\")\n        return None\n\n    outputs = predictor(im)\n\n    instances = outputs['instances']\n    pred_classes = instances.pred_classes  # tensor\n    pred_boxes = instances.pred_boxes  # Boxes object\n\n    logo_boxes = pred_boxes[pred_classes == 1].tensor\n\n    return logo_boxes\n\n\ndef config_rcnn(cfg_path, weights_path, conf_threshold):\n    '''\n    Configure weights and confidence threshold\n    :param cfg_path:\n    :param weights_path:\n    :param conf_threshold:\n    :return:\n    '''\n    cfg = get_cfg()\n    cfg.merge_from_file(cfg_path)\n    cfg.MODEL.WEIGHTS = weights_path\n    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = conf_threshold\n    # uncomment if you installed detectron2 cpu version\n    if not torch.cuda.is_available():\n        cfg.MODEL.DEVICE = 'cpu'\n\n    # Initialize model\n    predictor = DefaultPredictor(cfg)\n    return predictor\n\n\nCOLORS = {\n    0: (255, 255, 0),  # logo\n    1: (36, 255, 12),  # input\n    2: (0, 255, 255),  # button\n    3: (0, 0, 255),  # label\n    4: (255, 0, 0)  # block\n}\n\n\ndef vis(img_path, pred_boxes):\n    '''\n    Visualize rcnn predictions\n    :param img_path: str\n    :param pred_boxes: torch.Tensor of shape Nx4, bounding box coordinates in (x1, y1, x2, y2)\n    :param pred_classes: torch.Tensor of shape Nx1 0 for logo, 1 for input, 2 for button, 3 for label(text near input), 4 for block\n    :return None\n    '''\n\n    check = cv2.imread(img_path)\n    if pred_boxes is None or len(pred_boxes) == 0:\n        print(\"Pred_boxes is None or the length of pred_boxes is 0\")\n        return check\n    pred_boxes = pred_boxes.numpy() if not isinstance(pred_boxes, np.ndarray) else pred_boxes\n\n    # draw rectangle\n    for j, box in enumerate(pred_boxes):\n        if j == 0:\n            cv2.rectangle(check, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), COLORS[0], 2)\n        else:\n            cv2.rectangle(check, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), COLORS[1], 2)\n\n    return check\n"
  },
  {
    "path": "models.py",
    "content": "# Copyright 2020 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#            http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n# Lint as: python3\n\"\"\"Bottleneck ResNet v2 with GroupNorm and Weight Standardization.\"\"\"\n\nfrom collections import OrderedDict  # pylint: disable=g-importing-member\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass StdConv2d(nn.Conv2d):\n\n    def forward(self, x):\n        w = self.weight\n        v, m = torch.var_mean(w, dim=[1, 2, 3], keepdim=True, unbiased=False)\n        w = (w - m) / torch.sqrt(v + 1e-10)\n        return F.conv2d(x, w, self.bias, self.stride, self.padding,\n                        self.dilation, self.groups)\n\n\ndef conv3x3(cin, cout, stride=1, groups=1, bias=False):\n    return StdConv2d(cin, cout, kernel_size=3, stride=stride,\n                     padding=1, bias=bias, groups=groups)\n\n\ndef conv1x1(cin, cout, stride=1, bias=False):\n    return StdConv2d(cin, cout, kernel_size=1, stride=stride,\n                     padding=0, bias=bias)\n\n\ndef tf2th(conv_weights):\n    \"\"\"Possibly convert HWIO to OIHW.\"\"\"\n    if conv_weights.ndim == 4:\n        conv_weights = conv_weights.transpose([3, 2, 0, 1])\n    return torch.from_numpy(conv_weights)\n\n\nclass PreActBottleneck(nn.Module):\n    \"\"\"Pre-activation (v2) bottleneck block.\n\n    Follows the implementation of \"Identity Mappings in Deep Residual Networks\":\n    https://github.com/KaimingHe/resnet-1k-layers/blob/master/resnet-pre-act.lua\n\n    Except it puts the stride on 3x3 conv when available.\n    \"\"\"\n\n    def __init__(self, cin, cout=None, cmid=None, stride=1):\n        super().__init__()\n        cout = cout or cin\n        cmid = cmid or cout // 4\n\n        self.gn1 = nn.GroupNorm(32, cin)\n        self.conv1 = conv1x1(cin, cmid)\n        self.gn2 = nn.GroupNorm(32, cmid)\n        self.conv2 = conv3x3(cmid, cmid, stride)  # Original code has it on conv1!!\n        self.gn3 = nn.GroupNorm(32, cmid)\n        self.conv3 = conv1x1(cmid, cout)\n        self.relu = nn.ReLU(inplace=True)\n\n        if (stride != 1 or cin != cout):\n            # Projection also with pre-activation according to paper.\n            self.downsample = conv1x1(cin, cout, stride)\n\n    def forward(self, x):\n        out = self.relu(self.gn1(x))\n\n        # Residual branch\n        residual = x\n        if hasattr(self, 'downsample'):\n            residual = self.downsample(out)\n\n        # Unit's branch\n        out = self.conv1(out)\n        out = self.conv2(self.relu(self.gn2(out)))\n        out = self.conv3(self.relu(self.gn3(out)))\n\n        return out + residual\n\n    def load_from(self, weights, prefix=''):\n        convname = 'standardized_conv2d'\n        with torch.no_grad():\n            self.conv1.weight.copy_(tf2th(weights[f'{prefix}a/{convname}/kernel']))\n            self.conv2.weight.copy_(tf2th(weights[f'{prefix}b/{convname}/kernel']))\n            self.conv3.weight.copy_(tf2th(weights[f'{prefix}c/{convname}/kernel']))\n            self.gn1.weight.copy_(tf2th(weights[f'{prefix}a/group_norm/gamma']))\n            self.gn2.weight.copy_(tf2th(weights[f'{prefix}b/group_norm/gamma']))\n            self.gn3.weight.copy_(tf2th(weights[f'{prefix}c/group_norm/gamma']))\n            self.gn1.bias.copy_(tf2th(weights[f'{prefix}a/group_norm/beta']))\n            self.gn2.bias.copy_(tf2th(weights[f'{prefix}b/group_norm/beta']))\n            self.gn3.bias.copy_(tf2th(weights[f'{prefix}c/group_norm/beta']))\n            if hasattr(self, 'downsample'):\n                w = weights[f'{prefix}a/proj/{convname}/kernel']\n                self.downsample.weight.copy_(tf2th(w))\n\n\nclass ResNetV2(nn.Module):\n    \"\"\"Implementation of Pre-activation (v2) ResNet mode.\"\"\"\n\n    def __init__(self, block_units, width_factor, head_size=21843, zero_head=False):\n        super().__init__()\n        wf = width_factor  # shortcut 'cause we'll use it a lot.\n\n        # The following will be unreadable if we split lines.\n        # pylint: disable=line-too-long\n        self.root = nn.Sequential(OrderedDict([\n            ('conv', StdConv2d(3, 64 * wf, kernel_size=7, stride=2, padding=3, bias=False)),\n            ('pad', nn.ConstantPad2d(1, 0)),\n            ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=0)),\n            # The following is subtly not the same!\n            # ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),\n        ]))\n\n        self.body = nn.Sequential(OrderedDict([\n            ('block1', nn.Sequential(OrderedDict(\n                [('unit01', PreActBottleneck(cin=64 * wf, cout=256 * wf, cmid=64 * wf))] +\n                [(f'unit{i:02d}', PreActBottleneck(cin=256 * wf, cout=256 * wf, cmid=64 * wf)) for i in\n                 range(2, block_units[0] + 1)],\n            ))),\n            ('block2', nn.Sequential(OrderedDict(\n                [('unit01', PreActBottleneck(cin=256 * wf, cout=512 * wf, cmid=128 * wf, stride=2))] +\n                [(f'unit{i:02d}', PreActBottleneck(cin=512 * wf, cout=512 * wf, cmid=128 * wf)) for i in\n                 range(2, block_units[1] + 1)],\n            ))),\n            ('block3', nn.Sequential(OrderedDict(\n                [('unit01', PreActBottleneck(cin=512 * wf, cout=1024 * wf, cmid=256 * wf, stride=2))] +\n                [(f'unit{i:02d}', PreActBottleneck(cin=1024 * wf, cout=1024 * wf, cmid=256 * wf)) for i in\n                 range(2, block_units[2] + 1)],\n            ))),\n            ('block4', nn.Sequential(OrderedDict(\n                [('unit01', PreActBottleneck(cin=1024 * wf, cout=2048 * wf, cmid=512 * wf, stride=2))] +\n                [(f'unit{i:02d}', PreActBottleneck(cin=2048 * wf, cout=2048 * wf, cmid=512 * wf)) for i in\n                 range(2, block_units[3] + 1)],\n            ))),\n        ]))\n        # pylint: enable=line-too-long\n\n        self.zero_head = zero_head\n        self.head = nn.Sequential(OrderedDict([\n            ('gn', nn.GroupNorm(32, 2048 * wf)),\n            ('relu', nn.ReLU(inplace=True)),\n            ('avg', nn.AdaptiveAvgPool2d(output_size=1)),\n            ('conv', nn.Conv2d(2048 * wf, head_size, kernel_size=1, bias=True)),\n        ]))\n\n    def features(self, x):\n        x = self.head[:-1](self.body(self.root(x)))\n\n        return x.squeeze(-1).squeeze(-1)\n\n    def forward(self, x):\n        x = self.head(self.body(self.root(x)))\n        assert x.shape[-2:] == (1, 1)  # We should have no spatial shape left.\n        return x[..., 0, 0]\n\n    def load_from(self, weights, prefix='resnet/'):\n        with torch.no_grad():\n            self.root.conv.weight.copy_(\n                tf2th(weights[f'{prefix}root_block/standardized_conv2d/kernel']))  # pylint: disable=line-too-long\n            self.head.gn.weight.copy_(tf2th(weights[f'{prefix}group_norm/gamma']))\n            self.head.gn.bias.copy_(tf2th(weights[f'{prefix}group_norm/beta']))\n            if self.zero_head:\n                nn.init.zeros_(self.head.conv.weight)\n                nn.init.zeros_(self.head.conv.bias)\n            else:\n                self.head.conv.weight.copy_(\n                    tf2th(weights[f'{prefix}head/conv2d/kernel']))  # pylint: disable=line-too-long\n                self.head.conv.bias.copy_(tf2th(weights[f'{prefix}head/conv2d/bias']))\n\n            for bname, block in self.body.named_children():\n                for uname, unit in block.named_children():\n                    unit.load_from(weights, prefix=f'{prefix}{bname}/{uname}/')\n\n\nKNOWN_MODELS = OrderedDict([\n    ('BiT-M-R50x1', lambda *a, **kw: ResNetV2([3, 4, 6, 3], 1, *a, **kw)),\n    ('BiT-M-R50x3', lambda *a, **kw: ResNetV2([3, 4, 6, 3], 3, *a, **kw)),\n    ('BiT-M-R101x1', lambda *a, **kw: ResNetV2([3, 4, 23, 3], 1, *a, **kw)),\n    ('BiT-M-R101x3', lambda *a, **kw: ResNetV2([3, 4, 23, 3], 3, *a, **kw)),\n    ('BiT-M-R152x2', lambda *a, **kw: ResNetV2([3, 8, 36, 3], 2, *a, **kw)),\n    ('BiT-M-R152x4', lambda *a, **kw: ResNetV2([3, 8, 36, 3], 4, *a, **kw)),\n    ('BiT-S-R50x1', lambda *a, **kw: ResNetV2([3, 4, 6, 3], 1, *a, **kw)),\n    ('BiT-S-R50x3', lambda *a, **kw: ResNetV2([3, 4, 6, 3], 3, *a, **kw)),\n    ('BiT-S-R101x1', lambda *a, **kw: ResNetV2([3, 4, 23, 3], 1, *a, **kw)),\n    ('BiT-S-R101x3', lambda *a, **kw: ResNetV2([3, 4, 23, 3], 3, *a, **kw)),\n    ('BiT-S-R152x2', lambda *a, **kw: ResNetV2([3, 8, 36, 3], 2, *a, **kw)),\n    ('BiT-S-R152x4', lambda *a, **kw: ResNetV2([3, 8, 36, 3], 4, *a, **kw)),\n])\n"
  },
  {
    "path": "phishpedia.py",
    "content": "import time\nfrom datetime import datetime\nimport argparse\nimport os\nimport torch\nimport cv2\nfrom configs import load_config\nfrom logo_recog import pred_rcnn, vis\nfrom logo_matching import check_domain_brand_inconsistency\nfrom tqdm import tqdm\n\nimport re\n\nos.environ['KMP_DUPLICATE_LIB_OK'] = 'True'\n\n\ndef result_file_write(f, folder, url, phish_category, pred_target, matched_domain, siamese_conf, logo_recog_time,\n                      logo_match_time):\n    f.write(folder + \"\\t\")\n    f.write(url + \"\\t\")\n    f.write(str(phish_category) + \"\\t\")\n    f.write(str(pred_target) + \"\\t\")  # write top1 prediction only\n    f.write(str(matched_domain) + \"\\t\")\n    f.write(str(siamese_conf) + \"\\t\")\n    f.write(str(round(logo_recog_time, 4)) + \"\\t\")\n    f.write(str(round(logo_match_time, 4)) + \"\\n\")\n\n\nclass PhishpediaWrapper:\n    _caller_prefix = \"PhishpediaWrapper\"\n    _DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'\n\n    def __init__(self):\n        self._load_config()\n\n    def _load_config(self):\n        self.ELE_MODEL, self.SIAMESE_THRE, self.SIAMESE_MODEL, \\\n            self.LOGO_FEATS, self.LOGO_FILES, \\\n            self.DOMAIN_MAP_PATH = load_config()\n        print(f'Length of reference list = {len(self.LOGO_FEATS)}')\n\n    def test_orig_phishpedia(self, url, screenshot_path, html_path):\n        # 0 for benign, 1 for phish, default is benign\n        phish_category = 0\n        pred_target = None\n        matched_domain = None\n        siamese_conf = None\n        plotvis = None\n        logo_match_time = 0\n        print(\"Entering phishpedia\")\n\n        ####################### Step1: Logo detector ##############################################\n        start_time = time.time()\n        pred_boxes = pred_rcnn(im=screenshot_path, predictor=self.ELE_MODEL)\n        logo_recog_time = time.time() - start_time\n\n        if pred_boxes is not None:\n            pred_boxes = pred_boxes.detach().cpu().numpy()\n        plotvis = vis(screenshot_path, pred_boxes)\n\n        # If no element is reported\n        if pred_boxes is None or len(pred_boxes) == 0:\n            print('No logo is detected')\n            return phish_category, pred_target, matched_domain, plotvis, siamese_conf, pred_boxes, logo_recog_time, logo_match_time\n\n        ######################## Step2: Siamese (Logo matcher) ########################################\n        start_time = time.time()\n        pred_target, matched_domain, matched_coord, siamese_conf = check_domain_brand_inconsistency(\n            logo_boxes=pred_boxes,\n            domain_map_path=self.DOMAIN_MAP_PATH,\n            model=self.SIAMESE_MODEL,\n            logo_feat_list=self.LOGO_FEATS,\n            file_name_list=self.LOGO_FILES,\n            url=url,\n            shot_path=screenshot_path,\n            similarity_threshold=self.SIAMESE_THRE,\n            topk=1)\n        logo_match_time = time.time() - start_time\n\n        if pred_target is None:\n            print('Did not match to any brand, report as benign')\n            return phish_category, pred_target, matched_domain, plotvis, siamese_conf, pred_boxes, logo_recog_time, logo_match_time\n\n        print('Match to Target: {} with confidence {:.4f}'.format(pred_target, siamese_conf))\n        phish_category = 1\n        # Visualize, add annotations\n        cv2.putText(plotvis, \"Target: {} with confidence {:.4f}\".format(pred_target, siamese_conf),\n                    (int(matched_coord[0] + 20), int(matched_coord[1] + 20)),\n                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)\n\n        return phish_category, pred_target, matched_domain, plotvis, siamese_conf, pred_boxes, logo_recog_time, logo_match_time\n\n\nif __name__ == '__main__':\n\n    '''run'''\n    today = datetime.now().strftime('%Y%m%d')\n\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--folder\", required=True, type=str)\n    parser.add_argument(\"--output_txt\", default=f'{today}_results.txt', help=\"Output txt path\")\n    args = parser.parse_args()\n\n    request_dir = args.folder\n    phishpedia_cls = PhishpediaWrapper()\n    result_txt = args.output_txt\n\n    os.makedirs(request_dir, exist_ok=True)\n\n    for folder in tqdm(os.listdir(request_dir)):\n        html_path = os.path.join(request_dir, folder, \"html.txt\")\n        screenshot_path = os.path.join(request_dir, folder, \"shot.png\")\n        info_path = os.path.join(request_dir, folder, 'info.txt')\n\n        if not os.path.exists(screenshot_path):\n            continue\n        if not os.path.exists(html_path):\n            html_path = os.path.join(request_dir, folder, \"index.html\")\n\n        with open(info_path, 'r') as file:\n            url = file.read()\n        \n        if os.path.exists(result_txt):\n            with open(result_txt, 'r', encoding='ISO-8859-1') as file:\n                if url in file.read():\n                    continue\n\n        _forbidden_suffixes = r\"\\.(mp3|wav|wma|ogg|mkv|zip|tar|xz|rar|z|deb|bin|iso|csv|tsv|dat|txt|css|log|xml|sql|mdb|apk|bat|exe|jar|wsf|fnt|fon|otf|ttf|ai|bmp|gif|ico|jp(e)?g|png|ps|psd|svg|tif|tiff|cer|rss|key|odp|pps|ppt|pptx|c|class|cpp|cs|h|java|sh|swift|vb|odf|xlr|xls|xlsx|bak|cab|cfg|cpl|cur|dll|dmp|drv|icns|ini|lnk|msi|sys|tmp|3g2|3gp|avi|flv|h264|m4v|mov|mp4|mp(e)?g|rm|swf|vob|wmv|doc(x)?|odt|rtf|tex|wks|wps|wpd)$\"\n        if re.search(_forbidden_suffixes, url, re.IGNORECASE):\n            continue\n\n        phish_category, pred_target, matched_domain, \\\n            plotvis, siamese_conf, pred_boxes, \\\n            logo_recog_time, logo_match_time = phishpedia_cls.test_orig_phishpedia(url, screenshot_path, html_path)\n\n        try:\n            with open(result_txt, \"a+\", encoding='ISO-8859-1') as f:\n                result_file_write(f, folder, url, phish_category, pred_target, matched_domain, siamese_conf,\n                                  logo_recog_time, logo_match_time)\n        except UnicodeError:\n            with open(result_txt, \"a+\", encoding='utf-8') as f:\n                result_file_write(f, folder, url, phish_category, pred_target, matched_domain, siamese_conf,\n                                  logo_recog_time, logo_match_time)\n        if phish_category:\n            os.makedirs(os.path.join(request_dir, folder), exist_ok=True)\n            cv2.imwrite(os.path.join(request_dir, folder, \"predict.png\"), plotvis)\n    \n"
  },
  {
    "path": "pixi.toml",
    "content": "[project]\nname = \"phishpedia\"\nchannels = [\"conda-forge\"]\nplatforms = [\"osx-arm64\", \"linux-64\", \"win-64\"]\n\n[dependencies]\npython = \">=3.8\"\npip = \"*\"\nsetuptools = \"*\"\nwheel = \"*\"\nnumpy = \"1.23.0\"\nrequests = \"*\"\nscikit-learn = \"*\"\nspacy = \"*\"\nbeautifulsoup4 = \"*\"\nmatplotlib = \"*\"\npandas = \"*\"\nnltk = \"*\"\ntqdm = \"*\"\nunidecode = \"*\"\ngdown = \"*\"\ntldextract = \"*\"\nscipy = \"*\"\npathlib = \"*\"\nfvcore = \"*\"\nlxml = \"*\"\npsutil = \"*\"\nPillow  = \"8.4.0\"\n\n\n[pypi-dependencies]\n\"flask\" = \"*\"\n\"flask-cors\" = \"*\"\n\"pycocotools\" = \"*\"\n\"opencv-python\"= \"*\"\n\"opencv-contrib-python\"= \"*\"\ntorch = { version = \">=1.9.0\", index = \"https://download.pytorch.org/whl/cpu\" }\ntorchvision = { version = \">=0.10.0\", index = \"https://download.pytorch.org/whl/cpu\" }\n\n"
  },
  {
    "path": "setup.bat",
    "content": "@echo off\nsetlocal enabledelayedexpansion\n\n:: ------------------------------------------------------------------------------\n:: Initialization and Logging\n:: ------------------------------------------------------------------------------\necho [%DATE% %TIME%] Starting setup...\n\n:: ------------------------------------------------------------------------------\n:: Tool Checks\n:: ------------------------------------------------------------------------------\nwhere pixi >nul 2>nul || (\n    echo [ERROR] pixi not found. Please install Pixi.\n    exit /b 1\n)\nwhere gdown >nul 2>nul || (\n    echo [ERROR] gdown not found. Please install gdown (via pixi).\n    exit /b 1\n)\nwhere unzip >nul 2>nul || (\n    echo [ERROR] unzip not found. Please install unzip utility.\n    exit /b 1\n)\n\n:: ------------------------------------------------------------------------------\n:: Setup Directories\n:: ------------------------------------------------------------------------------\nset \"FILEDIR=%cd%\"\nset \"MODELS_DIR=%FILEDIR%\\models\"\nif not exist \"%MODELS_DIR%\" mkdir \"%MODELS_DIR%\"\ncd /d \"%MODELS_DIR%\"\n\n:: ------------------------------------------------------------------------------\n:: Install Detectron2\n:: ------------------------------------------------------------------------------\necho [%DATE% %TIME%] Installing detectron2...\npixi run pip install --no-build-isolation git+https://github.com/facebookresearch/detectron2.git || (\n    echo [ERROR] Failed to install detectron2.\n    exit /b 1\n)\n\n:: ------------------------------------------------------------------------------\n:: File Metadata\n:: ------------------------------------------------------------------------------\nset RETRY_COUNT=3\n\n:: Model files and Google Drive IDs\nset file1=rcnn_bet365.pth\nset id1=1tE2Mu5WC8uqCxei3XqAd7AWaP5JTmVWH\n\nset file2=faster_rcnn.yaml\nset id2=1Q6lqjpl4exW7q_dPbComcj0udBMDl8CW\n\nset file3=resnetv2_rgb_new.pth.tar\nset id3=1H0Q_DbdKPLFcZee8I14K62qV7TTy7xvS\n\nset file4=expand_targetlist.zip\nset id4=1fr5ZxBKyDiNZ_1B6rRAfZbAHBBoUjZ7I\n\nset file5=domain_map.pkl\nset id5=1qSdkSSoCYUkZMKs44Rup_1DPBxHnEKl1\n\n:: ------------------------------------------------------------------------------\n:: Download Loop\n:: ------------------------------------------------------------------------------\nfor /L %%i in (1,1,5) do (\n    call set \"FILENAME=%%file%%i%%\"\n    call set \"FILEID=%%id%%i%%\"\n\n    if exist \"!FILENAME!\" (\n        echo [INFO] !FILENAME! already exists. Skipping.\n    ) else (\n        set /A count=1\n        :retry_%%i\n        echo [%DATE% %TIME%] Downloading !FILENAME! (Attempt !count!/%RETRY_COUNT%)...\n        pixi run gdown --id !FILEID! -O \"!FILENAME!\" && goto downloaded_%%i\n\n        set /A count+=1\n        if !count! LEQ %RETRY_COUNT% (\n            timeout /t 2 >nul\n            goto retry_%%i\n        ) else (\n            echo [ERROR] Failed to download !FILENAME! after %RETRY_COUNT% attempts.\n            exit /b 1\n        )\n        :downloaded_%%i\n    )\n)\n\n:: ------------------------------------------------------------------------------\n:: Extraction\n:: ------------------------------------------------------------------------------\necho [%DATE% %TIME%] Extracting expand_targetlist.zip...\nunzip -o expand_targetlist.zip -d expand_targetlist || (\n    echo [ERROR] Failed to unzip file.\n    exit /b 1\n)\n\n:: Flatten nested folder if necessary\ncd expand_targetlist\nif exist expand_targetlist\\ (\n    echo [INFO] Flattening nested expand_targetlist directory...\n    move expand_targetlist\\*.* . >nul\n    rmdir expand_targetlist\n)\n\n:: ------------------------------------------------------------------------------\n:: Done\n:: ------------------------------------------------------------------------------\necho [%DATE% %TIME%] [SUCCESS] Model setup and extraction complete.\nendlocal\n"
  },
  {
    "path": "setup.sh",
    "content": "#!/bin/bash\n\nset -euo pipefail  # Safer bash behavior\nIFS=$'\\n\\t'\n\n# Install Detectron2\npixi run pip install --no-build-isolation git+https://github.com/facebookresearch/detectron2.git\n\n# Set up model directory\nFILEDIR=\"$(pwd)\"\nMODELS_DIR=\"$FILEDIR/models\"\nmkdir -p \"$MODELS_DIR\"\ncd \"$MODELS_DIR\"\n\n# Download model files\npixi run gdown --id \"1tE2Mu5WC8uqCxei3XqAd7AWaP5JTmVWH\" -O \"rcnn_bet365.pth\"\npixi run gdown --id \"1Q6lqjpl4exW7q_dPbComcj0udBMDl8CW\" -O \"faster_rcnn.yaml\"\npixi run gdown --id \"1H0Q_DbdKPLFcZee8I14K62qV7TTy7xvS\" -O \"resnetv2_rgb_new.pth.tar\"\npixi run gdown --id \"1fr5ZxBKyDiNZ_1B6rRAfZbAHBBoUjZ7I\" -O \"expand_targetlist.zip\"\npixi run gdown --id \"1qSdkSSoCYUkZMKs44Rup_1DPBxHnEKl1\" -O \"domain_map.pkl\"\n\n# Extract and flatten expand_targetlist\necho \"Extracting expand_targetlist.zip...\"\nunzip -o expand_targetlist.zip -d expand_targetlist\n\ncd expand_targetlist || error_exit \"Extraction directory missing.\"\n\nif [ -d \"expand_targetlist\" ]; then\n  echo \"Flattening nested expand_targetlist/ directory...\"\n  mv expand_targetlist/* .\n  rm -r expand_targetlist\nfi\n\necho \"Model setup and extraction complete.\"\n"
  },
  {
    "path": "utils.py",
    "content": "import torch.nn.functional as F\nimport math\n\n\ndef resolution_alignment(img1, img2):\n    '''\n    Resize two images according to the minimum resolution between the two\n    :param img1: first image in PIL.Image\n    :param img2: second image in PIL.Image\n    :return: resized img1 in PIL.Image, resized img2 in PIL.Image\n    '''\n    w1, h1 = img1.size\n    w2, h2 = img2.size\n    w_min, h_min = min(w1, w2), min(h1, h2)\n    if w_min == 0 or h_min == 0:  # something wrong, stop resizing\n        return img1, img2\n    if w_min < h_min:\n        img1_resize = img1.resize((int(w_min), math.ceil(h1 * (w_min / w1))))  # ceiling to prevent rounding to 0\n        img2_resize = img2.resize((int(w_min), math.ceil(h2 * (w_min / w2))))\n    else:\n        img1_resize = img1.resize((math.ceil(w1 * (h_min / h1)), int(h_min)))\n        img2_resize = img2.resize((math.ceil(w2 * (h_min / h2)), int(h_min)))\n    return img1_resize, img2_resize\n\n\ndef brand_converter(brand_name):\n    '''\n    Helper function to deal with inconsistency in brand naming\n    '''\n    brand_tran_dict = {'Adobe Inc.': 'Adobe', 'Adobe Inc': 'Adobe',\n                       'ADP, LLC': 'ADP', 'ADP, LLC.': 'ADP',\n                       'Amazon.com Inc.': 'Amazon', 'Amazon.com Inc': 'Amazon',\n                       'Americanas.com S,A Comercio Electrnico': 'Americanas.com S',\n                       'AOL Inc.': 'AOL', 'AOL Inc': 'AOL',\n                       'Apple Inc.': 'Apple', 'Apple Inc': 'Apple',\n                       'AT&T Inc.': 'AT&T', 'AT&T Inc': 'AT&T',\n                       'Banco do Brasil S.A.': 'Banco do Brasil S.A',\n                       'Credit Agricole S.A.': 'Credit Agricole S.A',\n                       'DGI (French Tax Authority)': 'DGI French Tax Authority',\n                       'DHL Airways, Inc.': 'DHL Airways', 'DHL Airways, Inc': 'DHL Airways', 'DHL': 'DHL Airways',\n                       'Dropbox, Inc.': 'Dropbox', 'Dropbox, Inc': 'Dropbox',\n                       'eBay Inc.': 'eBay', 'eBay Inc': 'eBay',\n                       'Facebook, Inc.': 'Facebook', 'Facebook, Inc': 'Facebook',\n                       'Free (ISP)': 'Free ISP',\n                       'Google Inc.': 'Google', 'Google Inc': 'Google',\n                       'Mastercard International Incorporated': 'Mastercard International',\n                       'Netflix Inc.': 'Netflix', 'Netflix Inc': 'Netflix',\n                       'PayPal Inc.': 'PayPal', 'PayPal Inc': 'PayPal',\n                       'Royal KPN N.V.': 'Royal KPN N.V',\n                       'SF Express Co.': 'SF Express Co',\n                       'SNS Bank N.V.': 'SNS Bank N.V',\n                       'Square, Inc.': 'Square', 'Square, Inc': 'Square',\n                       'Webmail Providers': 'Webmail Provider',\n                       'Yahoo! Inc': 'Yahoo!', 'Yahoo! Inc.': 'Yahoo!',\n                       'Microsoft OneDrive': 'Microsoft', 'Office365': 'Microsoft', 'Outlook': 'Microsoft',\n                       'Global Sources (HK)': 'Global Sources HK',\n                       'T-Online': 'Deutsche Telekom',\n                       'Airbnb, Inc': 'Airbnb, Inc.',\n                       'azul': 'Azul',\n                       'Raiffeisen Bank S.A': 'Raiffeisen Bank S.A.',\n                       'Twitter, Inc': 'Twitter, Inc.', 'Twitter': 'Twitter, Inc.',\n                       'capital_one': 'Capital One Financial Corporation',\n                       'la_banque_postale': 'La Banque postale',\n                       'db': 'Deutsche Bank AG',\n                       'Swiss Post': 'PostFinance', 'PostFinance': 'PostFinance',\n                       'grupo_bancolombia': 'Bancolombia',\n                       'barclays': 'Barclays Bank Plc',\n                       'gov_uk': 'Government of the United Kingdom',\n                       'Aruba S.p.A': 'Aruba S.p.A.',\n                       'TSB Bank Plc': 'TSB Bank Limited',\n                       'strato': 'Strato AG',\n                       'cogeco': 'Cogeco',\n                       'Canada Revenue Agency': 'Government of Canada',\n                       'UniCredit Bulbank': 'UniCredit Bank Aktiengesellschaft',\n                       'ameli_fr': 'French Health Insurance',\n                       'Banco de Credito del Peru': 'bcp'\n                       }\n    # find the value in the dict else return the origin brand name\n    tran_brand_name = brand_tran_dict.get(brand_name, None)\n    if tran_brand_name:\n        return tran_brand_name\n    else:\n        return brand_name\n\n\ndef l2_norm(x):\n    \"\"\"\n    l2 normalization\n    :param x:\n    :return:\n    \"\"\"\n    if len(x.shape):\n        x = x.reshape((x.shape[0], -1))\n    return F.normalize(x, p=2, dim=1)\n"
  }
]